In [1]:
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import auc_score, precision_at_k

from sklearn.neighbors import NearestNeighbors
import scipy.sparse as scs

import numpy as np
import pandas as pd

import itertools
import os

In [2]:
PATH_TO_DATA = os.path.join()
SEED = 42

In [3]:
interactions = pd.read_csv(os.path.join(PATH_TO_DATA, 'interactions.csv'))
interactions = interactions.rename(columns={'row': 'customer_id', 'col': 'item_id', 'data': 'response'})
interactions.columns = [x.lower() for x in interactions.columns]
#print('interactions')
#print(interactions.head())

#goods
item_asset = pd.read_csv(os.path.join(PATH_TO_DATA, 'item_asset.csv')).drop(['col'], axis=1)
item_asset = item_asset.rename(columns={'row': 'item_id', 'data': 'property_item'})
item_asset.columns = [x.lower() for x in item_asset.columns]
#print('item_asset')
#print(item_asset.head())

item_price = pd.read_csv(os.path.join(PATH_TO_DATA, 'item_price.csv')).drop(['col'], axis=1)
item_price = item_price.rename(columns={'row': 'item_id', 'data': 'price_scaled'})
item_price.columns = [x.lower() for x in item_price.columns]
#print('item_price')
#print(item_price.head())

item_subclass = pd.read_csv(os.path.join(PATH_TO_DATA, 'item_subclass.csv'))
item_subclass = item_subclass.rename(columns={'row': 'item_id', 'col': 'category_num', 'data': 'category_connection'})
item_subclass.columns = [x.lower() for x in item_subclass.columns]
#print('item_subclass')
#print(item_subclass.head())

#users
user_age = pd.read_csv(os.path.join(PATH_TO_DATA, 'user_age.csv')).drop(['col'], axis=1)
user_age = user_age.rename(columns={'row': 'customer_id', 'data': 'age_scaled'})
user_age.columns = [x.lower() for x in user_age.columns]
#print('user_age')
#print(user_age.head())

user_region = pd.read_csv(os.path.join(PATH_TO_DATA, 'user_region.csv'))
user_region = user_region.rename(columns={'row': 'customer_id', 'col': 'region_ohe_position', 'data': 'region_feat'})
user_region.columns = [x.lower() for x in user_region.columns]
#print('user_region')
#print(user_region.head())


In [4]:
print(f'interactions shape: {interactions.shape}')
interactions.head()

interactions shape: (398636, 3)


Unnamed: 0,customer_id,item_id,response
0,0,3568,1.0
1,0,3827,1.0
2,0,4844,1.0
3,0,5734,1.0
4,0,6518,1.0


In [5]:
print(interactions['customer_id'].unique().shape, interactions['item_id'].unique().shape)
print(f'unique responses: {interactions["response"].unique()}')

(27255,) (15277,)
unique responses: [1.]


We can see that this task is about **implicit feedback** (have only one unique values in interactions)

In [6]:
interactions_pivot = interactions.pivot(index='customer_id', columns='item_id', values='response').fillna(0)
flag = False
for i, x in enumerate(interactions_pivot.values.sum(axis=1)):
    if not x:
        flag = True
        print(i)
if not flag:
    print('all users have at least 1 connection')

all users have at least 1 connection


In [7]:
interactions_pivot.head()

item_id,0,2,4,5,6,7,9,10,11,12,...,18485,18486,18487,18488,18489,18490,18491,18492,18493,18494
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**Group tables for more convenient work**

In [8]:
print(f'item_asset shape: {item_asset.shape}, item_price shape: {item_price.shape}, \
item_subclass shape: {item_subclass.shape}')

print(f'''unique item_id in item_asset shape: {item_asset['item_id'].unique().shape[0]}, \
unique item_id in item_price shape: {item_price['item_id'].unique().shape[0]}, \
item_subclass shape: {item_subclass['item_id'].unique().shape[0]}''')

item_asset shape: (18490, 2), item_price shape: (18493, 2), item_subclass shape: (18495, 3)
unique item_id in item_asset shape: 18490, unique item_id in item_price shape: 18493, item_subclass shape: 18495


In [9]:
items = pd.merge(item_asset, item_price, how='inner', on='item_id')
items = pd.merge(items, item_subclass, how='inner', on='item_id')
print(f'items shape: {items.shape}')
items.head()

items shape: (18488, 5)


Unnamed: 0,item_id,property_item,price_scaled,category_num,category_connection
0,0,0.009497,0.012911,679,1.0
1,1,0.004226,0.005211,1376,1.0
2,2,0.003371,0.004131,1495,1.0
3,3,0.002991,0.003521,502,1.0
4,4,0.002991,0.003521,502,1.0


We can see that we got a bit less items than there're all in all but due to their small quantity (18495-18488=7) I'll drop them for simplicity.

Of course maybe we should add them in all items and generate their absent features in some way but not today :)

In [10]:
items.isna().sum()

item_id                0
property_item          0
price_scaled           0
category_num           0
category_connection    0
dtype: int64

Cool there're no missing values in items

In [11]:
print(f'user_age shape: {user_age.shape}, user_region shape: {user_region.shape}')
print(f'''unique user_id in user_age: {user_age['customer_id'].unique().shape[0]}, \
      unique iser_id in user_region: {user_region['customer_id'].unique().shape[0]}''')

users = pd.merge(user_age, user_region, how='inner', on='customer_id')
print(f'users shape: {users.shape}')
users.head()

user_age shape: (30317, 2), user_region shape: (26609, 3)
unique user_id in user_age: 30317,       unique iser_id in user_region: 26375
users shape: (25781, 4)


Unnamed: 0,customer_id,age_scaled,region_ohe_position,region_feat
0,2,1.0,5,1.0
1,7,1.0,6,1.0
2,10,1.0,7,1.0
3,12,1.0,7,1.0
4,13,1.0,7,1.0


In [12]:
users.isna().sum()

customer_id            0
age_scaled             0
region_ohe_position    0
region_feat            0
dtype: int64

Cool there're no missing values in users

### Little preprocessing

In [13]:
users_interact_id = set(interactions['customer_id'].unique())
users_info_id = set(users['customer_id'].unique())
print(f' Cnt cold users: {len(users_info_id - users_interact_id)}, \
cnt warm users without features: {len(users_interact_id - users_info_id)}')

items_interact_id = set(interactions['item_id'].unique())
items_info_id = set(items['item_id'].unique())
print(f' Cnt cold items: {len(items_info_id - items_interact_id)}, \
cnt warm items without features: {len(items_interact_id - items_info_id)}')

 Cnt cold users: 3027, cnt warm users without features: 4501
 Cnt cold items: 3217, cnt warm items without features: 6


**Get cold users and items apart and create recommendations for cold users**

Cold items are useless for metrics maximization so how treat them depends on real situation and business case but for now let's just keep them apart.

Cold users have to get rankings of items. One of the popular case is recommending popular items (also we can add some random items just to gather info). I'll use this approach, it's also good due to quite bad user's features due to them we cannot create good recommender system giving predictions based on user's contents

In [14]:
users_cold = users_info_id - users_interact_id
items_cold = items_info_id - items_interact_id 

In [16]:
interactions_pivot.to_numpy().sum(axis=1).shape

(27255,)

In [17]:
popular_items = interactions_pivot.to_numpy().sum(axis=0)
popular_items_top_10 = np.argsort(popular_items)[:-11:-1]
popular_items_top_10

array([14846,  6939,  4217,  8214,  3873,   155,  7211,  8421,  3337,
        3660])

In [18]:
#for cold users take popular items
cold_users_recommendation = {}
for user in users_cold:
    cold_users_recommendation[user] = list(popular_items_top_10)

In [19]:
#drop cold users and items
users = users[~users['customer_id'].isin(users_cold)]
items = items[~items['item_id'].isin(items_cold)]

Here's warm users without features.
<br>
Let's use ANN to find k nearest neighbors and take their features for warm users without them (if k=1 then it's clear, else if k > 1 then we can aggregate feature's values in some ways)

The same is for items

### Approximate NN

In [20]:
def get_nearest_neighbors(train, test, k_neigh=1, id_name='user_id'):
    '''
    Fit NearestNeighbors model and return dataframe with neighbors
    
    Parameters:
        train - numpy array to be fitted
        test - numpy array for which neighbors are searched.
        !! the 1-st column of train and test is obliged to be 'user_id' or smth like that !!
        k_neigh - number of neighbors to be found
        id_name - name of objects (like 'users_id', 'item_id' etc)
        
    Return dataframe of id, number of neigh, id of neigh (from train), dist to neigh
    '''
    
    #fit model
    knn = NearestNeighbors(k_neigh, n_jobs=-1, leaf_size=K_NEIGH*20, algorithm='kd_tree')
    knn.fit(train[:, 1:])
    
    #get dist and indxs of neighbors
    dist, indxs = knn.kneighbors(test[:, 1:], k_neigh)
    
    ##get neighbors##
    #get id from train which are neighbors to id in test
    kneighbors = train[:,0][indxs]
    
    #create a convenient form to work with
    cold_neighbors = np.hstack([test[:, 0].reshape(-1,1), kneighbors])
    df = pd.DataFrame(cold_neighbors, columns=[id_name] + [i for i in range(1, k_neigh+1)])
    
    #make some columns as rows (it allows to join in a more convenient way furthere on need)
    df_unstack = df.melt(id_vars=[id_name], value_name=f'{id_name}_neigh', var_name='number_neigh')
    df_unstack = df_unstack.sort_values([id_name, 'number_neigh']).reset_index(drop=True)
    
    #put distance to neighbors in dataframe
    df_unstack['dist_neigh'] = dist.reshape(-1, 1)
    
    return df_unstack

**Users**

In [21]:
train = interactions_pivot[~(interactions_pivot.index.isin(users_interact_id - users_info_id))]
#keep users_id
train_id = np.array(train.index)

train = train.to_numpy()
#add users_id to get abonents furthere
train = np.hstack([train_id.reshape(-1, 1), train])

In [22]:
test = interactions_pivot[(interactions_pivot.index.isin(users_interact_id - users_info_id))]
#keep users_id
test_id = np.array(test.index)
test = test.to_numpy()

#add users_id to get abonents furthere
test = np.hstack([test_id.reshape(-1, 1), test])

In [23]:
%%time
#let's now use the simplest version and find 1 neighbor
K_NEIGH = 1
users_neigh = get_nearest_neighbors(train, test, K_NEIGH, 'customer_id')

CPU times: user 2h 3min 48s, sys: 1min 11s, total: 2h 4min 59s
Wall time: 3min 19s


In [24]:
print(users_neigh.shape)
users_neigh.head(3)

(4501, 4)


Unnamed: 0,customer_id,number_neigh,customer_id_neigh,dist_neigh
0,0.0,1,18488.0,2.236068
1,1.0,1,6898.0,2.236068
2,3.0,1,432.0,1.414214


**Filling features for users without features**

Let's fill users table with features for those users and items that hadn't features before

In [25]:
users_neigh['customer_id'] = users_neigh['customer_id'].astype(int)
users_neigh['customer_id_neigh'] = users_neigh['customer_id_neigh'].astype(int)

In [26]:
#get features for users without them based of ther neighbors features
users_temp = pd.merge(users, users_neigh, left_on='customer_id', right_on='customer_id_neigh')
users_temp = users_temp.drop(['customer_id_x', 'number_neigh', 'dist_neigh', 'customer_id_neigh'], axis=1)
users_temp = users_temp.rename(columns={'customer_id_y': 'customer_id'})
users_temp.head(2)

Unnamed: 0,age_scaled,region_ohe_position,region_feat,customer_id
0,1.0,6,1.0,6460
1,1.0,7,1.0,4071


In [27]:
users.head(2)

Unnamed: 0,customer_id,age_scaled,region_ohe_position,region_feat
0,2,1.0,5,1.0
1,7,1.0,6,1.0


In [28]:
print(f'Users shape was:{users.shape}, new users with features shape: {users_temp.shape}')
users = pd.concat([users, users_temp])
print(f'Users shape become: {users.shape}')

Users shape was:(22754, 4), new users with features shape: (4501, 4)
Users shape become: (27255, 4)


In [29]:
print(f'Cnt unique')
print(f'''age_scaled: {users['age_scaled'].unique().shape[0]}, \
      region_ohe_position: {users['region_ohe_position'].unique().shape[0]}, \
      region_feat: {users['region_feat'].unique().shape[0]}''')

Cnt unique
age_scaled: 1,       region_ohe_position: 6,       region_feat: 1


Hm, seems age_scaled and region_feat are unnecessary, let's drop them

Also, we have region_ohe_position, let's decode it in len(unique region_ohe_position) columns like true one-hot

In [30]:
users = users[['customer_id', 'region_ohe_position']]

In [31]:
users['region_ohe_position'].unique()

array([5, 6, 7, 4, 2, 3])

In [32]:
unique_cols = users['region_ohe_position'].unique()
for i in unique_cols:
    users[f'region_{i}'] = np.where(users['region_ohe_position']==i, 1, 0)

In [33]:
users = users.drop(['region_ohe_position'], axis=1)

In [34]:
users.head(3)

Unnamed: 0,customer_id,region_5,region_6,region_7,region_4,region_2,region_3
0,2,1,0,0,0,0,0
1,7,0,1,0,0,0,0
2,10,0,0,1,0,0,0


**Items**

In [35]:
##!!Attention: T is used to get item_is by index

train = interactions_pivot.T[~(interactions_pivot.T.index.isin(items_interact_id - items_info_id))]
#keep users_id
train_id = np.array(train.index)

train = train.to_numpy()
#add users_id to get abonents furthere
train = np.hstack([train_id.reshape(-1, 1), train])

In [36]:
test = interactions_pivot.T[(interactions_pivot.T.index.isin(items_interact_id - items_info_id))]
#keep users_id
test_id = np.array(test.index)
test = test.to_numpy()

#add users_id to get abonents furthere
test = np.hstack([test_id.reshape(-1, 1), test])

In [37]:
%%time
#let's now use the simplest version and find 1 neighbor
K_NEIGH = 1
items_neigh = get_nearest_neighbors(train, test, K_NEIGH, 'item_id')

CPU times: user 1min 18s, sys: 4.82 s, total: 1min 23s
Wall time: 1min 16s


In [38]:
print(items_neigh.shape)
items_neigh.head(3)

(6, 4)


Unnamed: 0,item_id,number_neigh,item_id_neigh,dist_neigh
0,3513.0,1,41.0,1.732051
1,10260.0,1,10629.0,3.872983
2,15387.0,1,12074.0,2.0


**Filling features for items without features**

Like for users

In [39]:
items_neigh['item_id'] = items_neigh['item_id'].astype(int)
items_neigh['item_id_neigh'] = items_neigh['item_id_neigh'].astype(int)

In [40]:
items.head(2)

Unnamed: 0,item_id,property_item,price_scaled,category_num,category_connection
0,0,0.009497,0.012911,679,1.0
2,2,0.003371,0.004131,1495,1.0


In [41]:
#get features for items without them based of ther neighbors features
items_temp = pd.merge(items, items_neigh, left_on='item_id', right_on='item_id_neigh')
items_temp = items_temp.drop(['item_id_x', 'number_neigh', 'dist_neigh', 'item_id_neigh'], axis=1)
items_temp = items_temp.rename(columns={'item_id_y': 'item_id'})
items_temp.head(2)

Unnamed: 0,property_item,price_scaled,category_num,category_connection,item_id
0,0.002849,0.003521,1817,1.0,3513
1,0.001472,0.00216,855,1.0,15391


In [42]:
print(f'items shape was:{items.shape}, new items with features shape: {items_temp.shape}')
items = pd.concat([items, items_temp])
print(f'items shape become: {items.shape}')

items shape was:(15271, 5), new items with features shape: (6, 5)
items shape become: (15277, 5)


**Continue modeling recsys**

In [43]:
print(f'users shape: {users.shape}, items shape: {items.shape}, interactions shape: {interactions.shape}')

users shape: (27255, 7), items shape: (15277, 5), interactions shape: (398636, 3)


### Lightfm

**First create users and items features**

During iterations got that these users features make model be worse

In [44]:
def build_features(df, features_names):
    ''' Build features for lightfm dataset
    Parameters:
        df - users or items dataframe
        features_names - list of feature's names
        
    Return:
        features appropriate for lightfm dataset in a "feature:value" format
    '''
    
    features = []
    columns = list(itertools.chain(*[[col]*len(df[col].unique()) for col in features_names]))
    unique_features = list(itertools.chain(*[list(df[col].unique()) for col in features_names]))
    
    for x, y in zip(columns, unique_features):
        if isinstance(y, str):
            features.append(f'{x}:{y}')
        else:
            features.append(f'{x}:{float(y)}')
    
    return features

In [45]:
print(users.columns)
print(items.columns)

Index(['customer_id', 'region_5', 'region_6', 'region_7', 'region_4',
       'region_2', 'region_3'],
      dtype='object')
Index(['item_id', 'property_item', 'price_scaled', 'category_num',
       'category_connection'],
      dtype='object')


In [46]:
user_features = build_features(users, users.columns[1:])
item_features = build_features(items, items.columns[1:])

In [47]:
print('user:', user_features[:3])
print('item:', item_features[:3])

user: ['region_5:1.0', 'region_5:0.0', 'region_6:0.0']
item: ['property_item:0.00949667616334283', 'property_item:0.003371320037986705', 'property_item:0.002991452991452991']


In [49]:
dataset = Dataset()

#from interactions users and items are used!
dataset.fit(users=interactions['customer_id'].unique(),
                    items=interactions['item_id'].unique(),
                    #user_features=user_features,
                    item_features=item_features
                   )

In [51]:
interactions_matrix, weights = dataset.build_interactions([(x[0], x[1], x[2]) for x in interactions.to_numpy()])

In [52]:
def feature_colon_value(list_values, features_names):
    '''
    Helper function that takes the user features and converts them into the proper "feature:value" format
    Parameters:
    
        list_values - list of feature's values of user or item like [1, 'Moscow', '10', ..., 'A']
        features_names - list of feature's names
        
    Return:
        list in appropriate form for lightfm dataset
        
    For example: if list_values = [1,1,0,'del'] and features_names = ['f1', 'f2', 'f3', 'loc']
    resultant output = ['f1:1', 'f2:1', 'f3:0', 'loc:del']
    '''
    result = []
    
    for x, y in zip(features_names, list_values):
        if isinstance(y, str):
            result.append(f'{x}:{y}')
        else:
            result.append(f'{x}:{float(y)}')
    
    return result

In [53]:
def build_features_list(df, features_names):
    
    '''Generate user or item features in proper format for ALL users or ALL items
    
    Parameters:
        df - users or items dataframe
        features_names - list of feature's names
        
    Return:
        list of features in appropriate format for lightfm for ALL users or items
    '''
    
    
    ad_subset = df[features_names]
    ad_list = [list(x) for x in ad_subset.to_numpy()]
    features_list = []
    for val in ad_list:
        features_list.append(feature_colon_value(val, features_names))
        
    return features_list

In [55]:
#cold items and users dropped so everything is okay to use these table
user_features_list = build_features_list(users, users.columns[1:])
item_features_list = build_features_list(items, items.columns[1:])

In [56]:
user_tuple = list(zip(interactions['customer_id'].unique(), user_features_list))
item_tuple = list(zip(interactions['item_id'].unique(), item_features_list))

In [58]:
#user_features = dataset.build_user_features(user_tuple, normalize=False)
item_features = dataset.build_item_features(item_tuple, normalize=False)

**Store mappings**

In [59]:
user_id_map, user_feature_map, item_id_map, item_feature_map = dataset.mapping()

### Lightfm

**Split on train and test**

Seems that adding user_features make metrics worse

In [60]:
train, test = random_train_test_split(interactions_matrix, test_percentage=0.2,
                                      random_state=np.random.RandomState(SEED))

In [79]:
#different parameters tried, these seems is ok
model = LightFM(loss='warp', 
                random_state=SEED, 
                learning_rate=0.01, 
                no_components=10,
                item_alpha=1e-6
                #user_alpha=1e-6
               )
model.fit(train,
          #user_features= user_features,
          item_features=item_features,
          epochs=30,
          num_threads=20 #fitting on more threads can cause problems in quality. 20 is from lightfm documentation
         )

<lightfm.lightfm.LightFM at 0x7f70bf417160>

In [126]:
pr = precision_at_k(model,
              test_interactions=test,
              train_interactions=train,
              num_threads=20,
              k=10,
              #user_features=user_features,
              item_features=item_features
              ).mean()

print(f'Mean precision at 10: {pr}')

Mean precision at 10: 0.007778759114444256


In [85]:
#just for interest
auc = auc_score(model,
          test_interactions=test,
          train_interactions=train,
          num_threads=20,
          #user_features=user_features,
          item_features=item_features
          ).mean()
print(f'AUC: {auc}')

AUC: 0.835292398929596


### A little more things

Here I'll predict items for warm users

In [86]:
#item_id_map is for mapping item_id from data to number in lightfm
#for example
print('from item_id to number in lightfm', list(item_id_map.keys())[0], '->',
      item_id_map[list(item_id_map.keys())[0]])

#let's create reverse mapping to get item_ids by lightfm number
item_id_map_reverse = {}
for k, v in item_id_map.items():
    item_id_map_reverse[v] = k

from item_id to number in lightfm 3568 -> 0


In [92]:
%%time
recom_top_10 = {}
for user in user_id_map.values():
    pred = model.predict(user_ids=user,
                 item_ids=np.arange(interactions_matrix.shape[1]),
                 #item_features=item_features,
                 num_threads=20)
    
    #get top 10 arg of items
    top_10 = np.argsort(pred)[:-11:-1]
    #get items_id
    l = []
    for t in top_10:
        l.append(item_id_map_reverse[t])
    
    #key is not user_id! but number in lightfm (0,1,2,..)
    #also we can create and use like user_id_map_reverse[user] to get user_id
    recom_top_10[user] = (l, list(top_10)) #(item_ids, indices)

CPU times: user 12min 28s, sys: 658 ms, total: 12min 29s
Wall time: 37.8 s


**To sum up**:
It'd be better if we tuned parameters by some frameworks (like optuna) and not by hand. Anyway we have model which can deal with new items and adding a bit better features of users will allow to deal with cold users more smarter than just recommend popular items to them. For current users we have an opportunity to get some recommendations in an easy way.

P.S. I didn't try ALS or SVD because they cannot deal with cold users and items at all. Also as I saw using ALS, lightfm or hybridSVD on the 1-st level and getting their embeddings, adding some new features for the 2-d level model like xgbranker can give great results especially in case when ranking is much more important than everything else

In [122]:
#take intersections of predicted and test values
#where get it put 1 else 0. In ideal it let test top_10 items based on test responses and order given by lightfm
binary_recom_10 = {}
test_np = test.toarray().astype(int)
for user, val in recom_top_10.items():
    ixs = np.argwhere(test_np[user, :]==1)
    common_ixs = set(val[1]) & set(ixs.flatten())
    #where there're common indices we have to change ix with 1 otherwise 0
    binary_recom_10[user] = np.where(np.isin(val[1], common_ixs), 1, 0)

**Good links**

[building-lightfm](https://towardsdatascience.com/how-i-would-explain-building-lightfm-hybrid-recommenders-to-a-5-year-old-b6ee18571309) - best

[building-lightfm](https://towardsdatascience.com/recommendation-system-in-python-lightfm-61c85010ce17)

[lightfm-vs-hybridsvd](https://www.eigentheories.com/blog/lightfm-vs-hybridsvd/) - very good, here's polar library with different recommender (lightfm is too)

[collaborative-filtering-and-hybrid-collaborative-content](https://towardsdatascience.com/recommendation-system-part-1-use-of-collaborative-filtering-and-hybrid-collaborative-content-in-6137ba64ad58)

[lightfm kaggle 2](https://www.kaggle.com/niyamatalmass/lightfm-hybrid-recommendation-system)

[lightfm kaggle 3](https://www.kaggle.com/shreyashnadage/lightfm)

[Converting Curiosity About Recommendation Systems Into Reality](https://medium.com/@bagaria.hb/converting-curiosity-about-recommendation-system-into-reality-c51937cc1684)

[avito recsys](https://habr.com/ru/company/avito/blog/439206/)

[learning to rank problem](https://everdark.github.io/k9/notebooks/ml/learning_to_rank/learning_to_rank.html)

[implicit library doc](https://implicit.readthedocs.io/en/latest/als.html)

[lightfm library doc](https://making.lyst.com/lightfm/docs/lightfm.html#lightfm.LightFM.get_item_representations)



    