In [1]:
import pandas as pd
from lightfm import LightFM
from lightfm.data import Dataset
import numpy as np

# Load the dataset
all_cleaned = pd.read_csv('../data/all_cleaned.csv', usecols=['user_id', 'isbn', 'book_rating', 'mod_book_title', 'mod_book_author'])

# Convert 'user_id' and 'isbn' to strings to ensure compatibility with LightFM
all_cleaned['user_id'] = all_cleaned['user_id'].astype(str)
all_cleaned['isbn'] = all_cleaned['isbn'].astype(str)




In [3]:
# Create a dictionary to map ISBNs to their titles and authors
isbn_info = all_cleaned.drop_duplicates(subset=['isbn']).set_index('isbn')[['mod_book_title', 'mod_book_author']].to_dict('index')


In [4]:
# Create a LightFM dataset object
dataset = Dataset()

# Fit the dataset to include all unique users and items
dataset.fit(
    users=all_cleaned['user_id'].unique(),
    items=all_cleaned['isbn'].unique()
)

# Build the user-item interaction matrix based on explicit feedback (book_rating)
(interactions, weights) = dataset.build_interactions(
    [(x[0], x[1], x[2]) for x in all_cleaned[['user_id', 'isbn', 'book_rating']].values]
)

# Get the user and item mappings
user_mapping, _, item_mapping, _ = dataset.mapping()


In [5]:
# Define the LightFM model using the logistic function for explicit feedback
model = LightFM(loss='logistic')

# Train the model on the interactions matrix
model.fit(interactions, epochs=30, num_threads=2)


<lightfm.lightfm.LightFM at 0x3042b6490>

In [6]:
def recommend_books(model, interactions, user_id, user_mapping, item_mapping, isbn_info, num_recommendations=5):
    # Ensure the user_id is a string
    user_id = str(user_id)

    # Check if the user_id exists in the user mapping
    if user_id not in user_mapping:
        raise ValueError(f"User ID {user_id} is not found in the dataset.")

    # Get the internal index for the user_id
    user_idx = user_mapping[user_id]

    # Predict scores for all items for the given user
    scores = model.predict(user_idx, np.arange(interactions.shape[1]))

    # Get the indices of the top scores
    top_items = np.argsort(-scores)[:num_recommendations]

    # Map the indices back to ISBNs and fetch their title and author
    recommended_books = []
    for item in top_items:
        isbn = list(item_mapping.keys())[list(item_mapping.values()).index(item)]
        book_info = isbn_info.get(isbn, {'mod_book_title': 'Unknown Title', 'mod_book_author': 'Unknown Author'})
        recommended_books.append({'isbn': isbn, 'title': book_info['mod_book_title'], 'author': book_info['mod_book_author']})

    return recommended_books


In [7]:
user_id_to_recommend = '208406.0'  # Replace with an actual user_id from your dataset

# Ensure the user ID is a string
user_id_to_recommend = str(user_id_to_recommend)

try:
    # Generate recommendations
    recommended_books = recommend_books(model, interactions, user_id_to_recommend, user_mapping, item_mapping, isbn_info)
    
    # Display the recommended books with titles and authors
    for book in recommended_books:
        print(f"ISBN: {book['isbn']}, Title: {book['title']}, Author: {book['author']}")
except ValueError as e:
    print(e)


ISBN: 044023722X, Title: a painted house, Author: john grisham
ISBN: 067976402X, Title: snow falling on cedars, Author: david guterson
ISBN: 059035342X, Title: harry potter and the sorcerers stone harry potter paperback, Author: j k rowling
ISBN: 044021145X, Title: the firm, Author: john grisham
ISBN: 1400034779, Title: the no 1 ladies detective agency today show book club 8, Author: alexander mccall smith
