In [1]:
# 📚 Book Recommendation System - Full Code

import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import pickle

# 1. Load datasets
books = pd.read_csv('books.csv', low_memory=False)
users = pd.read_csv('users.csv')
ratings = pd.read_csv('ratings.csv')

# 2. Merge ratings with book information
ratings_with_name = ratings.merge(books, on='ISBN')

# 3. Create popularity-based recommender
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating': 'num_ratings'}, inplace=True)

avg_rating_df = ratings_with_name.groupby('Book-Title').mean(numeric_only=True)['Book-Rating'].reset_index()
avg_rating_df.rename(columns={'Book-Rating': 'avg_rating'}, inplace=True)

popular_df = num_rating_df.merge(avg_rating_df, on='Book-Title')
popular_df = popular_df[popular_df['num_ratings'] >= 250].sort_values('avg_rating', ascending=False).head(50)
popular_df = popular_df.merge(books, on='Book-Title').drop_duplicates('Book-Title')
popular_df = popular_df[['Book-Title', 'Book-Author', 'Image-URL-M', 'num_ratings', 'avg_rating']]

# 4. Collaborative Filtering Recommender
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
active_users = x[x].index
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(active_users)]

y = filtered_rating.groupby('Book-Title').count()['Book-Rating'] >= 50
famous_books = y[y].index
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

# 5. Create pivot table and similarity matrix
pt = final_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')
pt.fillna(0, inplace=True)
similarity_scores = cosine_similarity(pt)

# 6. Recommendation function
def recommend(book_name):
    index = np.where(pt.index == book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:5]

    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        data.append(item)
    return data

# 7. Test the recommender
print(recommend('1984'))

# 8. Save for deployment
pickle.dump(popular_df, open('popular.pkl', 'wb'))
pickle.dump(pt, open('pt.pkl', 'wb'))
pickle.dump(books, open('books.pkl', 'wb'))
pickle.dump(similarity_scores, open('similarity_scores.pkl', 'wb'))


FileNotFoundError: [Errno 2] No such file or directory: 'books.csv'