In [1]:
import pandas as pd
import numpy as np
import os

In [36]:
# loading the artifacts
final_books_df = pd.read_pickle('../artifacts/books.pkl')
ratings_df = pd.read_pickle('../artifacts/ratings.pkl')

In [13]:
# Calculate the Number of Ratings for Each Book 
# A high average rating is meaningless if only one person has rated the book.
# We need to count how many ratings each book has received.


# We group the ratings dataframe by ISBN and count the number of ratings for each.
num_ratings_df = ratings_df.groupby('ISBN').count()['Book-Rating'].reset_index()
num_ratings_df.rename(columns={'Book-Rating': 'num_ratings'}, inplace=True)

num_ratings_df

Unnamed: 0,ISBN,num_ratings
0,0330299891,2
1,0375404120,2
2,0586045007,1
3,9022906116,2
4,9032803328,1
...,...,...
340551,cn113107,1
340552,ooo7156103,1
340553,§423350229,1
340554,´3499128624,1


In [25]:
# Merge Rating Counts into the Main Book DataFrame. will merge based on the 'ISBN' column.
# Now we'll add the 'num_ratings' column to our main 'final_books_df'.

popular_book_df = final_books_df.merge(num_ratings_df, on='ISBN')
popular_book_df

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Avg-Book-Rating,Image-URL-M,num_ratings
0,0590567330,A Light in the Storm: The Civil War Diary of ...,Karen Hesse,1999,Hyperion Books for Children,2.250000,http://images.amazon.com/images/P/0590567330.0...,4
1,0964147726,Always Have Popsicles,Rebecca Harvin,1994,Rebecca L. Harvin,0.000000,http://images.amazon.com/images/P/0964147726.0...,1
2,0942320093,Apple Magic (The Collector's series),Martina Boudreau,1984,Amer Cooking Guild,0.000000,http://images.amazon.com/images/P/0942320093.0...,1
3,0310232546,"Ask Lily (Young Women of Faith: Lily Series, ...",Nancy N. Rue,2001,Zonderkidz,8.000000,http://images.amazon.com/images/P/0310232546.0...,1
4,0962295701,Beyond IBM: Leadership Marketing and Finance ...,Lou Mobley,1989,"Teleonet, Incorporated",0.000000,http://images.amazon.com/images/P/0962295701.0...,1
...,...,...,...,...,...,...,...,...
269210,3499232499,Ã?Â?lpiraten.,Janwillem van de Wetering,2002,Rowohlt Tb.,0.000000,http://images.amazon.com/images/P/3499232499.0...,2
269211,325721538X,Ã?Â?rger mit Produkt X. Roman.,Joan Aiken,1987,Diogenes Verlag,5.250000,http://images.amazon.com/images/P/325721538X.0...,4
269212,3451274973,Ã?Â?sterlich leben.,Anselm GrÃ?Â¼n,2001,"Herder, Freiburg",7.000000,http://images.amazon.com/images/P/3451274973.0...,1
269213,3442725739,Ã?Â?stlich der Berge.,David Guterson,2000,btb,2.666667,http://images.amazon.com/images/P/3442725739.0...,3


In [26]:
# Filter for Truly Popular Books
# We'll set a minimum threshold (Let's use 250 as our threshold) for the number of ratings.

popular_book_df = popular_book_df[popular_book_df['num_ratings'] >= 250]
popular_book_df

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Avg-Book-Rating,Image-URL-M,num_ratings
869,0446610038,1st to Die: A Novel,James Patterson,2002,Warner Vision,3.498721,http://images.amazon.com/images/P/0446610038.0...,391
1879,0446611867,A Bend in the Road,Nicholas Sparks,2002,Warner Books,3.264000,http://images.amazon.com/images/P/0446611867.0...,250
2441,1558743669,"A Child Called \It\"": One Child's Courage to S...",Dave Pelzer,1995,Health Communications,4.091603,http://images.amazon.com/images/P/1558743669.0...,262
4313,0375725784,A Heartbreaking Work of Staggering Genius,Dave Eggers,2001,Vintage Books USA,3.423841,http://images.amazon.com/images/P/0375725784.0...,302
4576,0553279912,A Is for Alibi (Kinsey Millhone Mysteries (Pap...,SUE GRAFTON,1987,Bantam,2.733333,http://images.amazon.com/images/P/0553279912.0...,285
...,...,...,...,...,...,...,...,...
260919,0345443284,While I Was Gone,Sue Miller,1999,Ballantine Books,2.654795,http://images.amazon.com/images/P/0345443284.0...,365
261310,0316569321,White Oleander : A Novel,Janet Fitch,1999,"Little, Brown",3.396226,http://images.amazon.com/images/P/0316569321.0...,265
261312,0316284955,White Oleander : A Novel (Oprah's Book Club),Janet Fitch,2000,Back Bay Books,3.772472,http://images.amazon.com/images/P/0316284955.0...,356
262263,0060987103,Wicked: The Life and Times of the Wicked Witch...,Gregory Maguire,1996,Regan Books,3.766871,http://images.amazon.com/images/P/0060987103.0...,326


In [27]:
# Sort to Find the Top 50 Popular Books
# Now that we have a list of established, popular books, we'll sort them by their average rating to find the "best of the best".

# Sort by 'Avg-Book-Rating' in descending order.
popular_book_df = popular_book_df.sort_values('Avg-Book-Rating', ascending=False)
popular_book_df

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Avg-Book-Rating,Image-URL-M,num_ratings
89599,043935806X,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic,6.785928,http://images.amazon.com/images/P/043935806X.0...,334
225608,006101351X,The Perfect Storm : A True Story of Men Agains...,Sebastian Junger,1998,HarperTorch,5.196429,http://images.amazon.com/images/P/006101351X.0...,364
213902,0345339681,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,1986,Del Rey,5.007117,http://images.amazon.com/images/P/0345339681.0...,281
245692,0446310786,To Kill a Mockingbird,Harper Lee,1988,Little Brown &amp; Company,4.920308,http://images.amazon.com/images/P/0446310786.0...,389
89585,0439064872,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,Scholastic,4.729345,http://images.amazon.com/images/P/0439064872.0...,351
...,...,...,...,...,...,...,...,...
181376,067976402X,Snow Falling on Cedars,David Guterson,1995,Vintage Books USA,2.252850,http://images.amazon.com/images/P/067976402X.0...,614
254499,0380730138,Vinegar Hill (Oprah's Book Club (Paperback)),A. Manette Ansay,1998,Perennial,2.245283,http://images.amazon.com/images/P/0380730138.0...,265
58323,0345391055,Disclosure,Michael Crichton,1994,Ballantine Books,2.216216,http://images.amazon.com/images/P/0345391055.0...,259
200092,044651652X,The Bridges of Madison County,Robert James Waller,1992,Warner Books,1.550272,http://images.amazon.com/images/P/044651652X.0...,368


In [28]:
top_50_popular_book = popular_book_df.head(50)
top_50_popular_book

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Avg-Book-Rating,Image-URL-M,num_ratings
89599,043935806X,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic,6.785928,http://images.amazon.com/images/P/043935806X.0...,334
225608,006101351X,The Perfect Storm : A True Story of Men Agains...,Sebastian Junger,1998,HarperTorch,5.196429,http://images.amazon.com/images/P/006101351X.0...,364
213902,0345339681,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,1986,Del Rey,5.007117,http://images.amazon.com/images/P/0345339681.0...,281
245692,0446310786,To Kill a Mockingbird,Harper Lee,1988,Little Brown &amp; Company,4.920308,http://images.amazon.com/images/P/0446310786.0...,389
89585,0439064872,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,Scholastic,4.729345,http://images.amazon.com/images/P/0439064872.0...,351
204759,0385504209,The Da Vinci Code,Dan Brown,2003,Doubleday,4.652322,http://images.amazon.com/images/P/0385504209.0...,883
201413,0316769487,The Catcher in the Rye,J.D. Salinger,1991,"Little, Brown",4.635236,http://images.amazon.com/images/P/0316769487.0...,403
89627,059035342X,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,1999,Arthur A. Levine Books,4.575088,http://images.amazon.com/images/P/059035342X.0...,571
209626,0786868716,The Five People You Meet in Heaven,Mitch Albom,2003,Hyperion,4.543326,http://images.amazon.com/images/P/0786868716.0...,427
209059,0345339703,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,1986,Del Rey,4.505837,http://images.amazon.com/images/P/0345339703.0...,257


In [31]:
# For a clean final output, let's select only the most relevant columns.
final_popular_book = top_50_popular_book[[
    'Book-Title', 
    'Book-Author', 
    'Image-URL-M', 
    'num_ratings', 
    'Avg-Book-Rating'
]].reset_index(drop=True) # Reset the index to go from 0 to 49.

final_popular_book

Unnamed: 0,Book-Title,Book-Author,Image-URL-M,num_ratings,Avg-Book-Rating
0,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,http://images.amazon.com/images/P/043935806X.0...,334,6.785928
1,The Perfect Storm : A True Story of Men Agains...,Sebastian Junger,http://images.amazon.com/images/P/006101351X.0...,364,5.196429
2,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,http://images.amazon.com/images/P/0345339681.0...,281,5.007117
3,To Kill a Mockingbird,Harper Lee,http://images.amazon.com/images/P/0446310786.0...,389,4.920308
4,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,http://images.amazon.com/images/P/0439064872.0...,351,4.729345
5,The Da Vinci Code,Dan Brown,http://images.amazon.com/images/P/0385504209.0...,883,4.652322
6,The Catcher in the Rye,J.D. Salinger,http://images.amazon.com/images/P/0316769487.0...,403,4.635236
7,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,http://images.amazon.com/images/P/059035342X.0...,571,4.575088
8,The Five People You Meet in Heaven,Mitch Albom,http://images.amazon.com/images/P/0786868716.0...,427,4.543326
9,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,http://images.amazon.com/images/P/0345339703.0...,257,4.505837


In [38]:
artifacts_folder = '../artifacts'
        
# Save the dataframe to the specified path, ensuring you use the same variable name.
final_popular_book.to_pickle(os.path.join(artifacts_folder, 'popular_books.pkl'))

In [35]:

print(os.getcwd())

/Users/mdshameemalam/Desktop/Project/Book-Recommendation-System/ml_model_popularity_based


In [39]:
os.listdir('../artifacts')

['ratings.pkl',
 'books.pkl',
 'users.pkl',
 '.ipynb_checkpoints',
 'popular_books.pkl']