In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [5]:
books = pd.read_csv('Books.csv', low_memory=False)
users = pd.read_csv('users.csv', low_memory=False)
ratings = pd.read_csv('ratings.csv', low_memory=False)

In [7]:
# Merge ratings with book details
ratings_with_name = ratings.merge(books, on='ISBN')

# Popularity-Based Recommender

In [10]:
# Calculate number of ratings and average rating for each book
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating': 'num_ratings'}, inplace=True)

In [12]:
avg_rating_df = ratings_with_name.groupby('Book-Title')['Book-Rating'].mean().reset_index()
avg_rating_df.rename(columns={'Book-Rating': 'avg_rating'}, inplace=True)

In [14]:
# Merge number of ratings and average ratings
popular_df = num_rating_df.merge(avg_rating_df, on='Book-Title')

In [16]:
# Filter books with at least 250 ratings and sort by average rating
popular_df = popular_df[popular_df['num_ratings'] >= 250].sort_values('avg_rating', ascending=False).head(50)

In [18]:
# Merge with book details to get author and image URL
popular_df = popular_df.merge(books, on='Book-Title').drop_duplicates('Book-Title')[['Book-Title', 'Book-Author', 'Image-URL-M', 'num_ratings', 'avg_rating']]

# Collaborative Filtering-Based Recommender

In [21]:
# Filter users who have rated more than 200 books
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
padhe_likhe_users = x[x].index

In [23]:
# Filter books that have been rated by at least 50 users
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(padhe_likhe_users)]
y = filtered_rating.groupby('Book-Title').count()['Book-Rating'] >= 50
famous_books = y[y].index

In [25]:
# Final ratings dataset
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [27]:
# Create pivot table
pt = final_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')
pt.fillna(0, inplace=True)

In [33]:
# Calculate cosine similarity
similarity_scores = cosine_similarity(pt)

In [39]:
similarity_scores.shape


(706, 706)

In [51]:
# Recommendation function
def recommend(book_name):
    # Normalize input and find partial matches
    book_name = book_name.lower().strip()
    matches = [title for title in pt.index if book_name in title.lower()]
    
    if not matches:
        return [["No books found matching your search. Please try another title."]]
    
    # Use the first match (you can modify this to handle multiple matches)
    matched_title = matches[0]
    index = np.where(pt.index == matched_title)[0][0]
    # Get similar items based on similarity scores
    similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:5]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        data.append(item)
    
    return data

In [53]:
# Save data for Flask app
pickle.dump(popular_df, open('popular.pkl', 'wb'))
pickle.dump(pt, open('pt.pkl', 'wb'))
pickle.dump(books, open('books.pkl', 'wb'))
pickle.dump(similarity_scores, open('similarity_scores.pkl', 'wb'))