In [2]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
from lightfm.data import Dataset



In [11]:
# Load the dataset
all_cleaned = pd.read_csv('../data/all_cleaned.csv', usecols=['user_id', 'isbn', 'book_rating'])

#### Creating the interaction matrix ####

In [12]:

# Initialize the Dataset object
dataset = Dataset()

# Fit the dataset to user_ids and item_ids (isbn)
dataset.fit((x for x in all_cleaned['user_id']),
            (x for x in all_cleaned['isbn']))

# Build the interactions matrix
(interactions, weights) = dataset.build_interactions(
    [(row['user_id'], row['isbn'], row['book_rating']) for idx, row in all_cleaned.iterrows()]) # this interaction matrix can now be passed to lightfm


#### Splitting the data ####

In [13]:
from lightfm.cross_validation import random_train_test_split

train_interactions, test_interactions = random_train_test_split(interactions, test_percentage=0.2)


#### Training the model ####

In [14]:
from lightfm import LightFM

model = LightFM(loss='warp')  # WARP is a good starting point for ranking tasks
model.fit(train_interactions, epochs=30, num_threads=4)


<lightfm.lightfm.LightFM at 0x17f4b7490>

 #### Using precision at k and AUC to evaluate the model performance ####

In [15]:
from lightfm.evaluation import precision_at_k, auc_score

# Precision@k
train_precision = precision_at_k(model, train_interactions, k=10).mean()
test_precision = precision_at_k(model, test_interactions, k=10).mean()

# AUC (Area Under the Curve)
train_auc = auc_score(model, train_interactions).mean()
test_auc = auc_score(model, test_interactions).mean()

print(f'Train precision@10: {train_precision}, Test precision@10: {test_precision}')
print(f'Train AUC: {train_auc}, Test AUC: {test_auc}')


Train precision@10: 0.021346142515540123, Test precision@10: 0.006430582143366337
Train AUC: 0.9888064861297607, Test AUC: 0.7072576284408569


#### hyper parameter tuning ####

In [16]:
from itertools import product

losses = ['warp', 'bpr']
learning_rates = [0.01, 0.05, 0.1]
no_components = [10, 30, 50]

# Keep track of the best score
best_precision = 0
best_params = {}

for loss, lr, components in product(losses, learning_rates, no_components):
    model = LightFM(loss=loss, learning_rate=lr, no_components=components)
    model.fit(train_interactions, epochs=30, num_threads=4)
    
    test_precision = precision_at_k(model, test_interactions, k=10).mean()
    
    if test_precision > best_precision:
        best_precision = test_precision
        best_params = {'loss': loss, 'learning_rate': lr, 'no_components': components}

print(f'Best Precision@10: {best_precision}')
print(f'Best Parameters: {best_params}')


Best Precision@10: 0.007415913976728916
Best Parameters: {'loss': 'warp', 'learning_rate': 0.01, 'no_components': 30}
