In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings as war
war.filterwarnings("ignore")
print("Imported")

Imported


In [2]:
## reading the csv files
print("="*100)
print("Importing Books CSV file")
print("-"*100)
books = pd.read_csv("Datasets/Books.csv")
print(f"Shape of the Books CSV File {books.shape}")
print("="*100)
print("Importing Ratings CSV file")
print("-"*100)
ratings = pd.read_csv("Datasets/Ratings.csv")
print(f"Shape of the Ratings CSV File {ratings.shape}")
print("="*100)
print("Importing Users CSV file")
print("-"*100)
users = pd.read_csv("Datasets/Users.csv")
print(f"Shape of the Users CSV File {users.shape}")
print("="*100)

Importing Books CSV file
----------------------------------------------------------------------------------------------------
Shape of the Books CSV File (271360, 8)
Importing Ratings CSV file
----------------------------------------------------------------------------------------------------
Shape of the Ratings CSV File (1149780, 3)
Importing Users CSV file
----------------------------------------------------------------------------------------------------
Shape of the Users CSV File (278858, 3)


In [4]:
## checking for null values
print("="*100)
print("Null Values for Books CSV file")
print("-"*100)
print(books.isnull().sum().tolist())
print("="*100)
print("Null Values for Ratings CSV file")
print("-"*100)
print(ratings.isnull().sum().tolist())
print("="*100)
print("Null Values for Users CSV file")
print("-"*100)
print(users.isnull().sum().tolist())
print("="*100)

Null Values for Books CSV file
----------------------------------------------------------------------------------------------------
[0, 0, 1, 0, 2, 0, 0, 3]
Null Values for Ratings CSV file
----------------------------------------------------------------------------------------------------
[0, 0, 0]
Null Values for Users CSV file
----------------------------------------------------------------------------------------------------
[0, 0, 110762]


## Popularity Based Recommender System

In [5]:
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [190]:
books['Book-Title'].nunique(), books.shape[0]

(242135, 271360)

In [6]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [191]:
ratings['User-ID'].nunique(), ratings.shape[0]

(105283, 1149780)

In [8]:
ratings_with_name = pd.merge(ratings, books, on="ISBN")
ratings_with_name.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...


In [192]:
ratings_with_name['User-ID'].nunique(), ratings_with_name['ISBN'].nunique(), ratings_with_name['Book-Title'].nunique(), ratings_with_name.shape[0]

(92106, 270151, 241071, 1031136)

In [195]:
ratings_with_name[ratings_with_name['Book-Title']=='The Lovely Bones: A Novel'].shape

(1295, 10)

In [30]:
total_ratings_books = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
total_ratings_books.rename(columns = {'Book-Rating':'num_ratings'}, inplace=True)

In [32]:
total_ratings_books.sort_values('num_ratings', ascending=False).head()

Unnamed: 0,Book-Title,num_ratings
234951,Wild Animus,2502
196326,The Lovely Bones: A Novel,1295
183573,The Da Vinci Code,898
5303,A Painted House,838
199237,The Nanny Diaries: A Novel,828


In [37]:
average_ratings_books = ratings_with_name.groupby('Book-Title').mean()['Book-Rating'].reset_index()
average_ratings_books.rename(columns = {'Book-Rating': 'average_rating'}, inplace=True)
average_ratings_books.sort_values('average_rating', ascending=False).head()

Unnamed: 0,Book-Title,average_rating
66223,Film Is: The International Free Cinema,10.0
123125,More Secrets of Happy Children: Embrace Your P...,10.0
96558,Jo's Boys : From the Original Publisher,10.0
212107,The Vanished Priestess : An Annie Szabo Mystery,10.0
72503,Game and Hunting,10.0


In [38]:
popularity_books = pd.merge(total_ratings_books, average_ratings_books, on="Book-Title")
popularity_books.head()

Unnamed: 0,Book-Title,num_ratings,average_rating
0,A Light in the Storm: The Civil War Diary of ...,4,2.25
1,Always Have Popsicles,1,0.0
2,Apple Magic (The Collector's series),1,0.0
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.0
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.0


In [48]:
popular_df = popularity_books[popularity_books['num_ratings'] >= 400].sort_values('average_rating', ascending=False)

In [52]:
print(popular_df.shape)
popular_df.head()

(64, 3)


Unnamed: 0,Book-Title,num_ratings,average_rating
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804
80414,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453
80445,Harry Potter and the Sorcerer's Stone (Harry P...,575,4.895652
219741,To Kill a Mockingbird,510,4.7
183573,The Da Vinci Code,898,4.642539


In [53]:
details_of_popular_books = pd.merge(popular_df, books, on='Book-Title')

In [58]:
details_of_popular_books.drop_duplicates('Book-Title', inplace=True)

In [60]:
print(details_of_popular_books.shape)
details_of_popular_books.head(2)

(64, 10)


Unnamed: 0,Book-Title,num_ratings,average_rating,ISBN,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804,439136350,J. K. Rowling,1999,Scholastic,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...
3,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453,439064872,J. K. Rowling,2000,Scholastic,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...,http://images.amazon.com/images/P/0439064872.0...


## Collaborative Filtering

In [66]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
true_indexes = x[x].index

In [72]:
filtered_ratings = ratings_with_name[ratings_with_name['User-ID'].isin(true_indexes)]

In [94]:
y = filtered_ratings.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = y[y].index

In [95]:
final_ratings = filtered_ratings[filtered_ratings['Book-Title'].isin(famous_books)]

In [96]:
rating_matrix = final_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')

In [97]:
rating_matrix.fillna(0, inplace=True)
rating_matrix

User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [98]:
from sklearn.metrics.pairwise import cosine_similarity

In [112]:
sim = cosine_similarity(rating_matrix)

In [185]:
def recommend(book_name):
    get_index = np.where(rating_matrix.index==book_name)[0][0]
    distances = sim[get_index]
    sim_books = sorted(list(enumerate(sim[get_index])), key=lambda x: x[1], reverse = True)[1:10]
    top_rec_books = []
    for i in sim_books:
        top_rec_books.append(rating_matrix.index[i[0]])
    return top_rec_books
    

In [186]:
recommend('Exclusive')

['The Long Road Home',
 'Eyes of a Child',
 'Fine Things',
 'Secrets',
 'The Cradle Will Fall',
 'Message from Nam',
 'Before I Say Good-Bye',
 'Unspeakable',
 'Kaleidoscope']

In [187]:
get_list_of_recommended_books = recommend('Exclusive')
get_list_of_recommended_books

['The Long Road Home',
 'Eyes of a Child',
 'Fine Things',
 'Secrets',
 'The Cradle Will Fall',
 'Message from Nam',
 'Before I Say Good-Bye',
 'Unspeakable',
 'Kaleidoscope']

In [188]:
books[books['Book-Title'].isin(get_list_of_recommended_books)].drop_duplicates('Book-Title')

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
136,671004573,Before I Say Good-Bye,Mary Higgins Clark,2001,Pocket,http://images.amazon.com/images/P/0671004573.0...,http://images.amazon.com/images/P/0671004573.0...,http://images.amazon.com/images/P/0671004573.0...
2180,884092097,Kaleidoscope,June Stepansky,2000,A Different Voice Publishing Co.,http://images.amazon.com/images/P/0884092097.0...,http://images.amazon.com/images/P/0884092097.0...,http://images.amazon.com/images/P/0884092097.0...
4006,446607193,Unspeakable,Sandra Brown,1999,Warner Books,http://images.amazon.com/images/P/0446607193.0...,http://images.amazon.com/images/P/0446607193.0...,http://images.amazon.com/images/P/0446607193.0...
10643,440200563,Fine Things,Danielle Steel,1988,Dell,http://images.amazon.com/images/P/0440200563.0...,http://images.amazon.com/images/P/0440200563.0...,http://images.amazon.com/images/P/0440200563.0...
12328,440176484,Secrets,DANIELLE STEEL,1986,Dell,http://images.amazon.com/images/P/0440176484.0...,http://images.amazon.com/images/P/0440176484.0...,http://images.amazon.com/images/P/0440176484.0...
12386,385319568,The Long Road Home,DANIELLE STEEL,1998,Delacorte Press,http://images.amazon.com/images/P/0385319568.0...,http://images.amazon.com/images/P/0385319568.0...,http://images.amazon.com/images/P/0385319568.0...
17698,440209412,Message from Nam,Danielle Steel,1991,Dell,http://images.amazon.com/images/P/0440209412.0...,http://images.amazon.com/images/P/0440209412.0...,http://images.amazon.com/images/P/0440209412.0...
19092,679429883,Eyes of a Child,Richard North Patterson,1994,Random House Inc,http://images.amazon.com/images/P/0679429883.0...,http://images.amazon.com/images/P/0679429883.0...,http://images.amazon.com/images/P/0679429883.0...
20838,671741195,The Cradle Will Fall,Mary Higgins Clark,1991,Pocket,http://images.amazon.com/images/P/0671741195.0...,http://images.amazon.com/images/P/0671741195.0...,http://images.amazon.com/images/P/0671741195.0...
