In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD

# Load Dataset
books = pd.read_csv("Books.csv", sep=";", encoding="latin-1", on_bad_lines="skip", low_memory=False)
ratings = pd.read_csv("Ratings.csv", sep=";", encoding="latin-1", on_bad_lines="skip", low_memory=False)

# Clean column names (remove spaces)
books.columns = books.columns.str.strip()
ratings.columns = ratings.columns.str.strip()

print("Books shape:", books.shape)
print("Ratings shape:", ratings.shape)

# Filter Dataset
# Keep only users with >= 50 ratings
active_users = ratings['User-ID'].value_counts()
active_users = active_users[active_users >= 50].index
ratings = ratings[ratings['User-ID'].isin(active_users)]

# Keep only books with >= 100 ratings
popular_books = ratings['ISBN'].value_counts()
popular_books = popular_books[popular_books >= 100].index
ratings = ratings[ratings['ISBN'].isin(popular_books)]

print("Filtered Ratings shape:", ratings.shape)

# Create User-Item Matrix
user_item_matrix = ratings.pivot_table(
    index='User-ID',
    columns='ISBN',
    values='Rating'
).fillna(0)

print("User-Item Matrix shape:", user_item_matrix.shape)

# Matrix Factorization (SVD)
svd = TruncatedSVD(n_components=20, random_state=42)
matrix = svd.fit_transform(user_item_matrix)

approx_ratings = np.dot(matrix, svd.components_)
pred_ratings = pd.DataFrame(
    approx_ratings,
    index=user_item_matrix.index,
    columns=user_item_matrix.columns
)

# Recommendation Function
def recommend_books(user_id, num_recommendations=5):
    if user_id not in pred_ratings.index:
        return "User not found!"
    
    # Sort predictions
    user_ratings = pred_ratings.loc[user_id].sort_values(ascending=False)
    
    # Exclude books already rated
    rated_books = ratings[ratings['User-ID'] == user_id]['ISBN'].tolist()
    recommendations = user_ratings.drop(rated_books, errors="ignore").head(num_recommendations)
    
    # Get book titles
    return books[books['ISBN'].isin(recommendations.index)][['ISBN','Title']]

sample_user = user_item_matrix.index[0]  # pick first user from filtered dataset
print(f"\nRecommendations for User {sample_user}:")
print(recommend_books(user_id=sample_user, num_recommendations=5))


Books shape: (271379, 5)
Ratings shape: (1149780, 3)
Filtered Ratings shape: (48139, 3)
User-Item Matrix shape: (3060, 306)

Recommendations for User 243:
            ISBN                                              Title
356   0142001740                            The Secret Life of Bees
456   044021145X                                           The Firm
563   0156027321                                         Life of Pi
706   0446672211  Where the Heart Is (Oprah's Book Club (Paperba...
1922  067976402X                             Snow Falling on Cedars
