In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
from lightfm.data import Dataset



In [2]:
# Load the dataset
all_cleaned = pd.read_csv('../data/all_cleaned.csv', usecols=['user_id', 'isbn', 'book_rating'])

In [3]:
# Convert 'user_id' and 'isbn' to strings to ensure compatibility with LightFM
all_cleaned['user_id'] = all_cleaned['user_id'].astype(str)
all_cleaned['isbn'] = all_cleaned['isbn'].astype(str)

In [4]:
# Create a LightFM dataset object
dataset = Dataset()

# Fit the dataset to include all unique users and items
dataset.fit(
    users=all_cleaned['user_id'].unique(),
    items=all_cleaned['isbn'].unique()
)

# Build the user-item interaction matrix based on explicit feedback (book_rating)
(interactions, weights) = dataset.build_interactions(
    [(x[0], x[1], x[2]) for x in all_cleaned[['user_id', 'isbn', 'book_rating']].values]
)

# Get the user and item mappings
user_mapping, _, item_mapping, _ = dataset.mapping()


In [5]:
# Define the LightFM model using the logistic loss function for explicit feedback
model = LightFM(loss='logistic')

# Train the model on the interactions matrix
model.fit(interactions, epochs=30, num_threads=2)


<lightfm.lightfm.LightFM at 0x127438290>

In [6]:
def recommend_books(model, interactions, user_id, user_mapping, item_mapping, num_recommendations=5):
    # Ensure the user_id is a string
    user_id = str(user_id)

    # Check if the user_id exists in the user mapping
    if user_id not in user_mapping:
        raise ValueError(f"User ID {user_id} is not found in the dataset.")

    # Get the internal index for the user_id
    user_idx = user_mapping[user_id]

    # Predict scores for all items for the given user
    scores = model.predict(user_idx, np.arange(interactions.shape[1]))

    # Get the indices of the top scores
    top_items = np.argsort(-scores)[:num_recommendations]

    # Map the indices back to ISBNs
    recommended_isbns = [list(item_mapping.keys())[list(item_mapping.values()).index(item)] for item in top_items]

    return recommended_isbns


In [9]:
user_id_to_recommend = '16634.0'  # Replace with an actual user_id from your dataset

# Ensure the user ID is a string
user_id_to_recommend = str(user_id_to_recommend)

try:
    # Generate recommendations
    recommended_books = recommend_books(model, interactions, user_id_to_recommend, user_mapping, item_mapping)
    print(f"Recommended ISBNs for user {user_id_to_recommend}: {recommended_books}")
except ValueError as e:
    print(e)


Recommended ISBNs for user 16634.0: ['044023722X', '067976402X', '059035342X', '044021145X', '1400034779']
