In [3]:
import numpy as np
import ast
import pandas as pd

In [4]:
df = pd.read_csv("../Data/GoodReads_100k_books_cleaned.csv")

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70687 entries, 0 to 70686
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   author        70687 non-null  object 
 1   bookformat    70687 non-null  object 
 2   desc          70687 non-null  object 
 3   genre         70687 non-null  object 
 4   img           70687 non-null  object 
 5   isbn          70687 non-null  object 
 6   rating        70687 non-null  float64
 7   reviews       70687 non-null  int64  
 8   title         70687 non-null  object 
 9   totalratings  70687 non-null  int64  
 10  genre_list    70687 non-null  object 
dtypes: float64(1), int64(2), object(8)
memory usage: 5.9+ MB


In [None]:
def isbn_list_to_book_details(isbn_list, df):
    """
    Function to retrieve book details for a list of ISBNs.
    
    Parameters:
    isbn_list (list): A list of ISBN numbers.
    df (DataFrame): The dataset containing book details.
    
    Returns:
    list: A list of dictionaries with book details for each ISBN in isbn_list.
    """
    books_details = []
    
    for isbn in isbn_list:
        book = df[df['isbn'] == isbn]

        if not book.empty:
            book_details = {
                'title': book['title'].values[0],
                'author': book['author'].values[0],
                'genre': book['genre'].values[0],
                'rating': book['rating'].values[0],
                'totalratings': book['totalratings'].values[0],
                'isbn': book['isbn'].values[0],
                'pages': book['pages'].values[0],
                'img': book['img'].values[0]
            }
            books_details.append(book_details)
        else:
            books_details.append(f"Book with ISBN {isbn} not found.")
    
    return books_details

In [None]:
class PopularityRecommender:
    """
    A faster popularity-based recommender using precomputed indices.

    Usage:
        rec = PopularityRecommender(df, min_ratings=10)
        rec.get_popular_isbns()  # overall top
        rec.get_popular_isbns(genre='History', top=10)
        rec.get_popular_isbns(genre=['History','Civil War'], top=5, match_mode='all')
    """
    def __init__(self, df, min_ratings=10):
        self.df = df.copy()
        self.df['genre_list'] = self.df['genre_list'].apply(self._parse_genre_list)
        self.df = self.df[self.df['totalratings'] > min_ratings]
        self.df['popularity_score'] = self.df['rating'] * np.log1p(self.df['totalratings'])
        self.genre_index = {}
        for idx, genres in self.df['genre_list'].items():
            for g in genres:
                self.genre_index.setdefault(g, set()).add(idx)

    def _parse_genre_list(self, val):
        """
        Safely parse the genre_list field, handling malformed strings.
        """
        if isinstance(val, list):
            return val
        if not isinstance(val, str):
            return []
        try:
            parsed = ast.literal_eval(val)
            if isinstance(parsed, list):
                return parsed
        except Exception:
            pass
        s = val.strip().lstrip('[').rstrip(']')
        items = []
        for part in s.split(','):
            item = part.strip().strip("'\" ")
            if item and '...' not in item:
                items.append(item)
        return items

    def get_popular_isbns(self, genre=None, top=20, match_mode='all'):
        """
        Retrieve top ISBNs by popularity, optionally filtered by genre(s).

        Parameters:
        - genre: None, str, or list of str
        - top: int
        - match_mode: 'any' or 'all'

        Returns:
        - List of ISBN strings, or None if no matches.
        """
        if genre is None:
            candidate_idx = list(self.df.index)
        else:
            genres = [genre] if isinstance(genre, str) else list(genre)
            sets = [self.genre_index.get(g, set()) for g in genres]
            if match_mode == 'any':
                candidate_idx = list(set().union(*sets))
            elif match_mode == 'all':
                candidate_idx = list(set.intersection(*sets)) if sets else []
            else:
                raise ValueError("match_mode must be 'any' or 'all'")

        if not candidate_idx:
            return None

        sub = self.df.loc[candidate_idx]
        top_df = sub.nlargest(top, 'popularity_score')
        return top_df['isbn'].tolist()


In [11]:
recommender = PopularityRecommender(df, min_ratings=10)
print('Top 20 popular books (ISBNs):', recommender.get_popular_isbns())
print('Top 10 History books (ISBNs):', recommender.get_popular_isbns(genre='History', top=10))
print('Top 5 History OR Civil War (ISBNs):', recommender.get_popular_isbns(genre=['History', 'Civil War'], top=5, match_mode='any'))
print('Top 5 History AND Civil War (ISBNs):', recommender.get_popular_isbns(genre=['History', 'Civil War'], top=5, match_mode='all'))

Top 20 popular books (ISBNs): ['439064864', '553588486', '62024035', '1594489505', '553381695', '450040186', '142437204', '756404738', '451526341', '312330871', '385199570', '1423140605', '1416975888', '1451648537', '62059939', '1423146727', '743454537', '1619630621', '1416914293', '66238501']
Top 10 History books (ISBNs): ['1451648537', '141014083', '067002581X', '679745580', '679729771', '345465083', '078670621X', '393324818', '743264738', '571212921']
Top 5 History OR Civil War (ISBNs): ['1451648537', '141014083', '067002581X', '679745580', '679729771']
Top 5 History AND Civil War (ISBNs): ['805093079', '60518502', '395518482', '385532415', '60937165']


In [12]:
import joblib

joblib.dump(recommender, 'good_book_pop_rec.pkl')

['good_book_pop_rec.pkl']

In [8]:
import joblib

rec = joblib.load('good_book_pop_rec.pkl')

In [9]:
rec.get_popular_isbns(genre='Magic', top=10)

['439064864',
 '756404738',
 '1619630621',
 '1416914293',
 '1619634449',
 '375826696',
 '185549664X',
 '61142026',
 '451464400',
 '1423160916']

In [None]:
import pandas as pd
import joblib
from App.utils import PopularityRecommender

recommender = PopularityRecommender(df, min_ratings=10)

joblib.dump(recommender, "good_book_pop_rec.pkl")
print("✅ Model saved successfully!")

✅ Model saved successfully!
