<a href="https://colab.research.google.com/github/SwordForShinobi/Retail-recommender-system/blob/main/RecSys_LightFM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Pure Collaborative Filtering i.e. without features. Experimentally it gave the best roc-auc score on Instacart case. So we just use only it for final predictions

In [None]:
!pip install lightfm

import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
from lightfm import LightFM

# 'Backend' functions
just launch

In [3]:
def get_user_list(df, user_column):
    """
    
    creating a list of user from dataframe df, user_column is a column 
    consisting of users in the dataframe df
    
    """
    
    return np.sort(df[user_column].unique())

def get_item_list(df, item_name_column):
    
    """
    
    creating a list of items from dataframe df, item_column is a column 
    consisting of items in the dataframe df
    
    return to item_id_list and item_id2name_mapping
    
    """
    
    item_list = df[item_name_column].unique()
    
    
    return item_list


def id_mappings(user_list, item_list):
    """
    
    Create id mappings to convert user_id, item_id, and feature_id
    
    """
    user_to_index_mapping = {}
    index_to_user_mapping = {}
    for user_index, user_id in enumerate(user_list):
        user_to_index_mapping[user_id] = user_index
        index_to_user_mapping[user_index] = user_id
        
    item_to_index_mapping = {}
    index_to_item_mapping = {}
    for item_index, item_id in enumerate(item_list):
        item_to_index_mapping[item_id] = item_index
        index_to_item_mapping[item_index] = item_id
        
    return user_to_index_mapping, index_to_user_mapping, \
           item_to_index_mapping, index_to_item_mapping, \


def get_user_product_interaction(user_to_product):

    user_to_product["product_count"] = 1
    user_to_product_rating_train = user_to_product.groupby(["user_id", "product_id"], as_index = False)["product_count"].sum()
     
    return user_to_product_rating_train
# короче вот где весь прикол: строки и столбцы матрицы - это индексы продуктов и юзеров. Индексы в дф, Карл,
# только тогда эта библиотека понимает и нормально работает.
def get_interaction_matrix(df, df_column_as_row, df_column_as_col, df_column_as_value, row_indexing_map,
                          col_indexing_map):
    
    row = df[df_column_as_row].apply(lambda x: row_indexing_map[x]).values
    col = df[df_column_as_col].apply(lambda x: col_indexing_map[x]).values
    value = df[df_column_as_value].values
    
    return coo_matrix((value, (row, col)), shape = (len(row_indexing_map), len(col_indexing_map)))


# Pipeline itself

In [5]:
trans = pd.read_csv('your_directory') # read file from this repository main/Data/transactions_probe.csv

In [6]:
users = get_user_list(trans, "user_id")
items = get_item_list(trans, "product_id")

In [7]:
user_to_index_mapping, index_to_user_mapping, \
           item_to_index_mapping, index_to_item_mapping = id_mappings(users, items)

In [8]:
user_to_product_rating_train = get_user_product_interaction(trans)

In [9]:
user_to_product_interaction_train = get_interaction_matrix(user_to_product_rating_train, "user_id", 
                                                    "product_id", "product_count", user_to_index_mapping, item_to_index_mapping)

In [10]:
model_without_features = LightFM(loss = "warp")
model_without_features.fit(user_to_product_interaction_train,
          user_features=None, 
          item_features=None, 
          sample_weight=None, 
          epochs=10, 
          num_threads=4,
          verbose=1)

Epoch: 100%|██████████| 10/10 [00:00<00:00, 925.51it/s]


<lightfm.lightfm.LightFM at 0x7fb4a53e7650>

In [22]:
class recommendation_sampling:
    
    def __init__(self, model, items = items, user_to_product_interaction_matrix = user_to_product_interaction_train, 
                user2index_map = user_to_index_mapping):
        
        self.user_to_product_interaction_matrix = user_to_product_interaction_matrix
        self.model = model
        self.items = items
        self.user2index_map = user2index_map
    
    def recommendation_for_user(self, user):
        
        # getting the userindex
        
        userindex = self.user2index_map.get(user, None)
        
        if userindex == None:
            return None
        
        users = userindex
        
        # products already bought
        
        known_positives = self.items[self.user_to_product_interaction_matrix.tocsr()[userindex].indices]
        
        # scores from model prediction
        scores = self.model.predict(user_ids = users, item_ids = np.arange(self.user_to_product_interaction_matrix.shape[1]))
        
        # top items
        
        top_items = self.items[np.argsort(-scores)]
        
        # printing out the result
        print("User %s" % user)
        print("     Known positives:")
        
        for x in known_positives[:10]:
            print("                  %s" % x)
            
            
        print("     Recommended:")
        
        for x in top_items[:10]:
            print("                  %s" % x)

    def recommendation_for_many(self, users):

      predictions = []

      for i in users:
        user = self.user2index_map.get(i)
        scores = self.model.predict(user_ids = int(user), item_ids = np.arange(self.user_to_product_interaction_matrix.shape[1]))
        top_items = self.items[np.argsort(-scores)]
        predictions.append(top_items[:10].tolist())
      
      return predictions 

In [23]:
recom = recommendation_sampling(model = model_without_features)

In [24]:
recom.recommendation_for_user(3)

User 3
     Known positives:
                  47766
                  16797
                  9387
                  17668
                  15143
                  39190
                  21903
                  39922
                  24810
                  32402
     Recommended:
                  16797
                  23650
                  38596
                  42265
                  39190
                  42557
                  16965
                  49683
                  9387
                  24810


In [25]:
predicts = recom.recommendation_for_many(trans['user_id'].unique())

In [30]:
# punctuation filter, not necessary
characters_to_remove = '[],'
clear_items_list = []

for string in predicts:
  for i in characters_to_remove:
    string = str(string).replace(i, '')
  clear_items_list.append(string)
answer = pd.DataFrame(trans['user_id'].unique().reshape(len(trans.user_id.unique()), 1), columns=['user_id'])
answer['product_id'] = clear_items_list

In [31]:
answer

Unnamed: 0,user_id,product_id
0,1,13176 16797 196 42265 49235 39657 47766 13032 ...
1,2,12000 37646 9124 30489 45948 32139 33276 46676...
2,3,16797 23650 38596 42265 39190 42557 16965 4968...
3,7,29871 42828 10895 33740 19019 10504 31683 1967...
4,13,4210 31372 27435 41926 5618 41351 32850 44142 ...
5,14,11348 10549 4489 27862 17923 43127 12168 9076 ...
6,15,42265 196 13176 49235 11266 48142 1747 49683 3...
7,17,15820 30992 9641 46844 9387 36463 16797 7350 4...
