In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import warnings
warnings.filterwarnings('ignore')

# Load preprocessed data and trained model
data = pd.read_csv('data/preprocessed_data.csv')
model = tf.keras.models.load_model('data/trained_model.h5')

# Load the books dataset
books = pd.read_csv('data/BX-Books.csv', sep=';', encoding='latin-1', error_bad_lines=False)

# Get the unique user IDs and book ISBNs
user_ids = data['User-ID'].unique()
book_isbns = data['ISBN'].unique()

# Create a mapping for user IDs and book ISBNs to integers
user_to_int = {user_id: i for i, user_id in enumerate(user_ids)}
book_to_int = {isbn: i for i, isbn in enumerate(book_isbns)}

# Create a reverse mapping for integer user IDs and book ISBNs
int_to_user = {i: user_id for i, user_id in enumerate(user_ids)}
int_to_book = {i: isbn for i, isbn in enumerate(book_isbns)}

# Function to get top N recommendations for a user
def recommend_books(user_id, n=10):
    if user_id not in user_to_int:
        print(f"User ID {user_id} not found in the dataset.")
        return pd.DataFrame()

    user_int = user_to_int[user_id]
    user_vector = np.array([user_int] * len(book_isbns))
    book_vector = np.arange(len(book_isbns))

    # Predict the ratings
    predictions = model.predict([user_vector, book_vector]).flatten()

    # Get the indices of the top N predictions
    top_n_indices = predictions.argsort()[-n:][::-1]

    # Get the corresponding book ISBNs
    top_n_isbns = [int_to_book[i] for i in top_n_indices]

    # Return the top N recommended books
    recommended_books = books[books['ISBN'].apply(lambda x: x in top_n_isbns)]

    if recommended_books.empty:
        print("No recommended books found.")
    return recommended_books

# Demo: input a user ID and get book recommendations
user_id = int(input("Enter a user ID: "))
recommended_books = recommend_books(user_id)
print("\nTop 10 recommended books:")
print(recommended_books[['ISBN', 'Book-Title', 'Book-Author']])

b'Skipping line 6452: expected 8 fields, saw 9\nSkipping line 43667: expected 8 fields, saw 10\nSkipping line 51751: expected 8 fields, saw 9\n'
b'Skipping line 92038: expected 8 fields, saw 9\nSkipping line 104319: expected 8 fields, saw 9\nSkipping line 121768: expected 8 fields, saw 9\n'
b'Skipping line 144058: expected 8 fields, saw 9\nSkipping line 150789: expected 8 fields, saw 9\nSkipping line 157128: expected 8 fields, saw 9\nSkipping line 180189: expected 8 fields, saw 9\nSkipping line 185738: expected 8 fields, saw 9\n'
b'Skipping line 209388: expected 8 fields, saw 9\nSkipping line 220626: expected 8 fields, saw 9\nSkipping line 227933: expected 8 fields, saw 11\nSkipping line 228957: expected 8 fields, saw 10\nSkipping line 245933: expected 8 fields, saw 9\nSkipping line 251296: expected 8 fields, saw 9\nSkipping line 259941: expected 8 fields, saw 9\nSkipping line 261529: expected 8 fields, saw 9\n'


No recommended books found.

Top 10 recommended books:
Empty DataFrame
Columns: [ISBN, Book-Title, Book-Author]
Index: []


In [5]:
# Load the data
data = pd.read_csv('data/preprocessed_data.csv')
books = pd.read_csv('data/BX-Books.csv', sep=';', error_bad_lines=False, encoding="latin-1", warn_bad_lines=False)

# Pivot the ratings data to create a user-item matrix
user_item_matrix = data.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating').fillna(0)

# Create a sparse matrix for more efficient calculations
sparse_matrix = csr_matrix(user_item_matrix.values)

# Calculate the cosine similarity between users
user_similarity = cosine_similarity(sparse_matrix)

def recommend_books(user_id, n=10):
    if user_id not in user_item_matrix.index:
        print(f"User ID {user_id} not found in the dataset.")
        return pd.DataFrame()

    # Find the index of the user in the user-item matrix
    user_index = user_item_matrix.index.get_loc(user_id)

    # Get the similarity scores of the user with all other users
    similarity_scores = user_similarity[user_index]

    # Calculate the weighted average of ratings based on similarity scores
    weighted_sum = np.dot(similarity_scores, user_item_matrix.values)
    sum_of_weights = np.array([np.abs(similarity_scores).sum()] * user_item_matrix.shape[1])
    predicted_ratings = weighted_sum / sum_of_weights

    # Get the indices of the top N highest predicted ratings
    top_n_indices = predicted_ratings.argsort()[-n:][::-1]

    # Get the corresponding book ISBNs
    top_n_isbns = user_item_matrix.columns[top_n_indices]

    # Return the top N recommended books
    return books[books['ISBN'].apply(lambda x: x in top_n_isbns)]

# Demo: input a user ID and get book recommendations
user_id = int(input("Enter a user ID: "))
recommended_books = recommend_books(user_id)
print("\nTop 10 recommended books:")
print(recommended_books[['ISBN', 'Book-Title', 'Book-Author']])

NameError: name 'csr_matrix' is not defined