In [216]:
import pandas as pd
import numpy as np

In [217]:
books = pd.read_csv("./Datasets/Books.csv")
ratings = pd.read_csv("./Datasets/Ratings.csv")
users = pd.read_csv("./Datasets/Users.csv")

  books = pd.read_csv("./Datasets/Books.csv")


In [218]:
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [219]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [220]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [221]:
books.isnull().sum()

ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

In [222]:
ratings.isnull().sum()

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [223]:
users.isnull().sum()

User-ID          0
Location         0
Age         110762
dtype: int64

In [224]:
books.duplicated().sum()

0

In [225]:
ratings.duplicated().sum()

0

In [226]:
users.duplicated().sum()

0

## Recommendation System By Popularity

The popularity-based book recommendation system implemented in this Jupyter Notebook follows a simple approach. It selects books that have a specified number of ratings and calculates the average rating for each book. The system then sorts the books based on their average rating in descending order.

To begin, the system merges the "books" and "ratings" dataframes to obtain a combined dataset. This dataset contains information about each book, including its title, author, publication year, publisher, and image URLs, along with the corresponding user ratings.

Next, the system performs some data preprocessing steps. It calculates the number of ratings received by each book and stores it in the "Number of Ratings" column of the "num_of_ratings" dataframe. Additionally, it calculates the average rating for each book and stores it in the "Average Rating" column of the "average_rating" dataframe.

Once the preprocessing is complete, the system merges the "num_of_ratings" and "average_rating" dataframes to obtain the "popular_books" dataframe. This dataframe contains the book titles, the number of ratings received, and the average rating for each book.

To generate the final recommendation, the system filters the "popular_books" dataframe based on a specified threshold for the number of ratings. Only books with a number of ratings above this threshold are considered. The filtered dataframe is then sorted based on the average rating in descending order, ensuring that the most popular books with higher average ratings appear at the top.

The resulting recommendation list provides users with a selection of popular books that have received a significant number of ratings and have high average ratings. This popularity-based approach allows users to discover books that are well-received by a large number of readers.

In [227]:
books_with_ratings = books.merge(ratings, on="ISBN")

In [228]:
x = books_with_ratings.groupby("ISBN").count() > 1
x[x].count()

Book-Title             124504
Book-Author            124504
Year-Of-Publication    124504
Publisher              124504
Image-URL-S            124504
Image-URL-M            124504
Image-URL-L            124503
User-ID                124504
Book-Rating            124504
dtype: int64

In [229]:
num_of_ratings = books_with_ratings.groupby("Book-Title").count().reset_index()
num_of_ratings.rename(columns={"Book-Rating": "Number of Ratings"}, inplace=True)
num_of_ratings["Number of Ratings"]

0         4
1         1
2         1
3         1
4         1
         ..
241066    2
241067    4
241068    1
241069    3
241070    2
Name: Number of Ratings, Length: 241071, dtype: int64

In [230]:
average_rating = books_with_ratings.groupby("Book-Title")["Book-Rating"].mean().reset_index()
average_rating.rename(columns={"Book-Rating": "Average Rating"}, inplace=True)
average_rating["Average Rating"] = average_rating["Average Rating"].round(2)
average_rating

Unnamed: 0,Book-Title,Average Rating
0,A Light in the Storm: The Civil War Diary of ...,2.25
1,Always Have Popsicles,0.00
2,Apple Magic (The Collector's series),0.00
3,"Ask Lily (Young Women of Faith: Lily Series, ...",8.00
4,Beyond IBM: Leadership Marketing and Finance ...,0.00
...,...,...
241066,Ã?Â?lpiraten.,0.00
241067,Ã?Â?rger mit Produkt X. Roman.,5.25
241068,Ã?Â?sterlich leben.,7.00
241069,Ã?Â?stlich der Berge.,2.67


In [231]:
popular_books = num_of_ratings.merge(average_rating, on="Book-Title")
popular_books = popular_books[popular_books["Number of Ratings"] > 200]
popular_books = popular_books.sort_values(by="Average Rating", ascending=False)
popular_books

Unnamed: 0,Book-Title,ISBN,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,User-ID,Number of Ratings,Average Rating
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,428,428,428,428,428,428,428,428,5.85
80422,Harry Potter and the Goblet of Fire (Book 4),387,387,387,387,387,387,387,387,387,5.82
80441,Harry Potter and the Sorcerer's Stone (Book 1),278,278,278,278,278,278,278,278,278,5.74
80426,Harry Potter and the Order of the Phoenix (Boo...,347,347,347,347,347,347,347,347,347,5.50
60582,Ender's Game (Ender Wiggins Saga (Paperback)),249,249,249,249,249,249,249,249,249,5.41
...,...,...,...,...,...,...,...,...,...,...,...
27600,Breathing Lessons,239,239,239,239,239,239,239,239,239,2.12
94382,Isle of Dogs,288,288,288,288,288,288,288,288,288,2.00
162153,Slow Waltz in Cedar Bend,248,248,248,248,248,248,248,248,248,1.90
163913,Songs in Ordinary Time (Oprah's Book Club (Pap...,232,232,232,232,232,232,232,232,232,1.86


In [232]:
popular_books = popular_books.merge(books, on="Book-Title").drop_duplicates().filter(["Book-Title", "Book-Author", "Year-Of-Publication", "Publisher", "Number of Ratings", "Average Rating", "Image-URL-M", "ISBN"])
popular_books

Unnamed: 0,Book-Title,Number of Ratings,Average Rating
0,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.85
1,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.85
2,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.85
3,Harry Potter and the Goblet of Fire (Book 4),387,5.82
4,Harry Potter and the Goblet of Fire (Book 4),387,5.82
...,...,...,...
1118,Slow Waltz in Cedar Bend,248,1.90
1119,Slow Waltz in Cedar Bend,248,1.90
1120,Slow Waltz in Cedar Bend,248,1.90
1121,Songs in Ordinary Time (Oprah's Book Club (Pap...,232,1.86


## Colaborative Based Recommendation System

This system follows a collaborative filtering approach to recommend books to users. Here's how it works:

1. First, we identify the users who have given the most ratings. This is done by calculating the count of ratings given by each user and selecting the users with the highest counts. These users are considered reliable and their ratings are used for recommendation.

2. Next, we filter the books based on their ratings. We select the books that have ratings falling within a specified range. This range can be customized based on the desired criteria for recommendation.

3. Once we have the reliable users and the filtered books, we create a pivot table. This pivot table represents the ratings given by the reliable users for each book. The rows of the pivot table represent the books, and the columns represent the reliable users.

4. We then calculate the cosine similarity between the books based on the ratings given by the reliable users. Cosine similarity measures the similarity between two vectors by calculating the cosine of the angle between them. In this case, the vectors represent the ratings given by the reliable users for each book.

5. Finally, we use the cosine similarity scores to recommend similar books. For a given book, we identify the most similar books based on the cosine similarity scores. These similar books are then recommended to the user.

This collaborative filtering approach leverages the ratings given by reliable users to find similar books and provide personalized recommendations.

In [233]:
x = books_with_ratings.groupby("User-ID").count()["Book-Rating"] > 100
reliable_users_indexes = x[x].index

In [234]:
ratings = books_with_ratings[books_with_ratings["User-ID"].isin(reliable_users_indexes)]

In [235]:
ratings

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,User-ID,Book-Rating
3,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,11676,8
6,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,85526,0
7,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,96054,0
10,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,177458,0
13,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,219008,7
...,...,...,...,...,...,...,...,...,...,...
1031128,1845170423,Cocktail Classics,David Biggs,2004,Connaught,http://images.amazon.com/images/P/1845170423.0...,http://images.amazon.com/images/P/1845170423.0...,http://images.amazon.com/images/P/1845170423.0...,275970,7
1031131,0440400988,There's a Bat in Bunk Five,Paula Danziger,1988,Random House Childrens Pub (Mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,276463,7
1031133,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,276680,0
1031134,0192126040,Republic (World's Classics),Plato,1996,Oxford University Press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,276680,0


In [236]:
y = ratings.groupby("Book-Title").count()["Book-Rating"] > 100
reliable_books_indexes = y[y].index

In [237]:
reliable_books_indexes.shape

(299,)

In [238]:
ratings = ratings[ratings["Book-Title"].isin(reliable_books_indexes)]
ratings

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,User-ID,Book-Rating
31,0399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,11676,9
32,0399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,29526,9
33,0399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,36836,0
34,0399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,46398,9
38,0399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,113270,0
...,...,...,...,...,...,...,...,...,...,...
1027889,0060093102,Mystic River,Dennis Lehane,2002,Harpercollins,http://images.amazon.com/images/P/0060093102.0...,http://images.amazon.com/images/P/0060093102.0...,http://images.amazon.com/images/P/0060093102.0...,238120,0
1028815,0743527631,The Pillars of the Earth,Ken Follett,2002,Encore,http://images.amazon.com/images/P/0743527631.0...,http://images.amazon.com/images/P/0743527631.0...,http://images.amazon.com/images/P/0743527631.0...,240144,0
1028817,0745168086,The Handmaid's Tale,Margaret Atwood,1999,Chivers Audio Books,http://images.amazon.com/images/P/0745168086.0...,http://images.amazon.com/images/P/0745168086.0...,http://images.amazon.com/images/P/0745168086.0...,240144,0
1030494,0553290703,Lightning,Patricia Potter,1992,Bantam Books,http://images.amazon.com/images/P/0553290703.0...,http://images.amazon.com/images/P/0553290703.0...,http://images.amazon.com/images/P/0553290703.0...,244685,9


In [239]:
books_with_ratings.where(books_with_ratings["User-ID"] == 278418).where(books_with_ratings["Book-Rating"] > 0).dropna().shape

(106, 10)

In [240]:
pt = ratings.pivot_table(index="Book-Title", columns="User-ID", values="Book-Rating")

In [241]:
pt

User-ID,254,507,882,1424,1435,1733,1903,2033,2110,2276,...,275020,275970,276463,276680,277427,277478,277639,278137,278188,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,,,,,,,,,,...,,0.0,,,,,,,,
1st to Die: A Novel,,0.0,,,,,,,,,...,,,,,,,,,,
2nd Chance,,,,,,,,,,10.0,...,,,,,,,0.0,,,
A Bend in the Road,0.0,,0.0,,,,,,,,...,,,,,,,,,0.0,
A Case of Need,,,,,7.0,,,,,,...,,,,,,,,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wicked: The Life and Times of the Wicked Witch of the West,,,,,,,,,,,...,,,,,,,,,,
Wild Animus,,,,,5.0,1.0,0.0,,,,...,,,,,0.0,0.0,,0.0,,
Wish You Well,,,,,,,,,,,...,,,,,,,,,,
Wuthering Heights,,,,,,,,,,,...,,,,,,,,,,


In [242]:
pt.fillna(0, inplace=True)

In [243]:
pt

User-ID,254,507,882,1424,1435,1733,1903,2033,2110,2276,...,275020,275970,276463,276680,277427,277478,277639,278137,278188,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Case of Need,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wicked: The Life and Times of the Wicked Witch of the West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Wild Animus,0.0,0.0,0.0,0.0,5.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Wish You Well,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Wuthering Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [244]:
from sklearn.metrics.pairwise import cosine_similarity

In [245]:
similarity_scores = cosine_similarity(pt)

In [246]:
similarity_scores.shape

(299, 299)

In [247]:
similarity_scores

array([[1.        , 0.06550754, 0.00805409, ..., 0.00705809, 0.04180813,
        0.03029366],
       [0.06550754, 1.        , 0.23629915, ..., 0.10249585, 0.05748184,
        0.12388301],
       [0.00805409, 0.23629915, 1.        , ..., 0.1449341 , 0.01093082,
        0.07992925],
       ...,
       [0.00705809, 0.10249585, 0.1449341 , ..., 1.        , 0.00957907,
        0.10082854],
       [0.04180813, 0.05748184, 0.01093082, ..., 0.00957907, 1.        ,
        0.02916002],
       [0.03029366, 0.12388301, 0.07992925, ..., 0.10082854, 0.02916002,
        1.        ]])

In [248]:
max_books = 6
def RecommendBooks(book_name):
    index = np.where(pt.index == book_name)[0][0]
    similar_books = sorted(list(enumerate((similarity_scores[index]))), key=lambda x: x[1], reverse=True)[1:max_books+1]
    
    recommended_books = []
    for i in similar_books:
        book = []
        temp = books[books["Book-Title"] == pt.index[i[0]]]
        book.extend(temp.drop_duplicates("Book-Title")['Book-Title'].values)
        book.extend(temp.drop_duplicates("Book-Title")['Book-Author'].values)
        book.extend(temp.drop_duplicates("Book-Title")['Publisher'].values)
        book.extend(temp.drop_duplicates("Book-Title")['Image-URL-M'].values)
        recommended_books.append(book)
        
    return recommended_books

In [249]:
RecommendBooks("A Case of Need")

[['Sphere',
  'MICHAEL CRICHTON',
  'Ballantine Books',
  'http://images.amazon.com/images/P/0345353145.01.MZZZZZZZ.jpg'],
 ['Congo',
  'Michael Crichton',
  'Ballantine Books',
  'http://images.amazon.com/images/P/0345378490.01.MZZZZZZZ.jpg'],
 ['Cruel &amp; Unusual (Kay Scarpetta Mysteries (Paperback))',
  'Patricia D. Cornwell',
  'Avon',
  'http://images.amazon.com/images/P/0380718340.01.MZZZZZZZ.jpg'],
 ['The Runaway Jury',
  'JOHN GRISHAM',
  'Dell',
  'http://images.amazon.com/images/P/0440221471.01.MZZZZZZZ.jpg'],
 ['Ashes to Ashes',
  'TAMI HOAG',
  'Bantam',
  'http://images.amazon.com/images/P/0553579606.01.MZZZZZZZ.jpg'],
 ['Saving Faith',
  'David Baldacci',
  'Warner Vision',
  'http://images.amazon.com/images/P/0446608890.01.MZZZZZZZ.jpg']]