In [908]:
import numpy as np
import pandas as pd

In [909]:
books = pd.read_csv('Books.csv')
users = pd.read_csv('users.csv')
ratings = pd.read_csv('Ratings.csv')

In [910]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [911]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [912]:
print(books.shape)
print(ratings.shape)
print(users.shape)

(406, 12)
(1149780, 3)
(278858, 3)


In [913]:
books.isnull().sum()

ISBN                           0
Book-Title                     0
Book-Author                    0
Author l-f                     0
Additional Authors            45
Average Rating                 0
Publisher                      4
Binding                       12
Number of Pages               29
Year Published               194
Original Publication Year    220
Date Added                     0
dtype: int64

In [914]:
users.isnull().sum()

User-ID          0
Location         0
Age         110762
dtype: int64

In [915]:
ratings.isnull().sum()

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [916]:
books.duplicated().sum()

0

In [917]:
ratings.duplicated().sum()

0

In [918]:
users.duplicated().sum()

0

## Popularity Based Recommender System

In [919]:
ratings_with_name = ratings.merge(books,on='ISBN')
ratings_with_name.shape

(5040, 14)

In [920]:
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating':'num_ratings'},inplace=True)
num_rating_df
num_rating_df.shape

(72, 2)

In [921]:
avg_rating_df = ratings_with_name.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_rating_df.rename(columns={'Book-Rating':'avg_rating'},inplace=True)
avg_rating_df

Unnamed: 0,Book-Title,avg_rating
0,?? ???? ????? ????? ??????? [Aaja Pheri Arko T...,4.900175
1,?? ????? ? [Aaja Ramita Chha],4.904762
2,?? ????? [Jaya Bhundi],5.730769
3,"??? ?????? ??? ?????? [Khana Pugos, Dina Pugos]",3.807692
4,??? ??????? [Tin Sambatsar],4.157895
...,...,...
67,Coma - a political sex,4.839286
68,Dreams from Nepal: The Emotional Story of a Tw...,4.576923
69,Palpasa Caf�,6.750000
70,That's My Love Story,4.063492


In [922]:
popular_df = num_rating_df.merge(avg_rating_df,on='Book-Title')
popular_df

Unnamed: 0,Book-Title,num_ratings,avg_rating
0,?? ???? ????? ????? ??????? [Aaja Pheri Arko T...,571,4.900175
1,?? ????? ? [Aaja Ramita Chha],63,4.904762
2,?? ????? [Jaya Bhundi],52,5.730769
3,"??? ?????? ??? ?????? [Khana Pugos, Dina Pugos]",26,3.807692
4,??? ??????? [Tin Sambatsar],57,4.157895
...,...,...,...
67,Coma - a political sex,56,4.839286
68,Dreams from Nepal: The Emotional Story of a Tw...,26,4.576923
69,Palpasa Caf�,4,6.750000
70,That's My Love Story,63,4.063492


In [923]:
popular_df = popular_df.sort_values('avg_rating',ascending=False)

In [924]:
popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title', 'ISBN','Book-Author','num_ratings','avg_rating']]

In [925]:
popular_df

Unnamed: 0,Book-Title,ISBN,Book-Author,num_ratings,avg_rating
0,?????????? [Pandulipi],684874350,Krishna Dharabasi,1,10.000000
1,???? ??? ????? ? ??????? [Kehi Katha Kavita ra...,385504209,Jagadish Ghimire,2,9.000000
2,??????? [Samantar],439139597,Ishwor Ballav,1,9.000000
3,???? [Dabali],1844262553,Rajeshwar Devkota,54,7.962963
4,???? ?????? ???? ? ???????? ????? [General Bhi...,156007754,Chittaranjan Nepali,1,7.000000
...,...,...,...,...,...
68,???????? ??? [Mancheko Nach],1841219266,Bijaya Malla,1,0.000000
69,??????????? [Agnisparsha],185854176X,Sharada Sharma,1,0.000000
70,??????? [Maharani],156619301X,Chandra Prakash Baniya,3,0.000000
71,????? [Basain],055321215X,Lil Bahadur Chettri,3,0.000000


## Collaborative Filtering Based Recommender System

In [939]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 5
padhe_likhe_users = x[x].index

Int64Index([  6251,   6575,   7346,  11601,  11676,  16795,  21014,  29259,
             32440,  35857,  35859,  40943,  43246,  55492,  60244,  66942,
             78834,  85426,  87555,  95173,  95359,  98741, 110912, 112881,
            127233, 135149, 148744, 156150, 171118, 174304, 178667, 185233,
            204864, 211426, 224138, 225989, 230522, 232131, 238120, 260897,
            264317, 271448],
           dtype='int64', name='User-ID')


In [927]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(padhe_likhe_users)]
filtered_rating.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Author l-f,Additional Authors,Average Rating,Publisher,Binding,Number of Pages,Year Published,Original Publication Year,Date Added
0,276788,043935806X,7,??????? [Tathakathit],Dhruba Chandra Gautam,"Gautam, Dhruba Chandra",??????????? ????,3.5,???? ???????,Paperback,206.0,2005.0,2002.0,12/5/2022
1,278356,043935806X,10,??????? [Tathakathit],Dhruba Chandra Gautam,"Gautam, Dhruba Chandra",??????????? ????,3.5,???? ???????,Paperback,206.0,2005.0,2002.0,12/5/2022
2,254,043935806X,0,??????? [Tathakathit],Dhruba Chandra Gautam,"Gautam, Dhruba Chandra",??????????? ????,3.5,???? ???????,Paperback,206.0,2005.0,2002.0,12/5/2022
3,2033,043935806X,9,??????? [Tathakathit],Dhruba Chandra Gautam,"Gautam, Dhruba Chandra",??????????? ????,3.5,???? ???????,Paperback,206.0,2005.0,2002.0,12/5/2022
4,4896,043935806X,0,??????? [Tathakathit],Dhruba Chandra Gautam,"Gautam, Dhruba Chandra",??????????? ????,3.5,???? ???????,Paperback,206.0,2005.0,2002.0,12/5/2022


In [928]:
y = filtered_rating.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = y[y].index

In [929]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]
final_ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Author l-f,Additional Authors,Average Rating,Publisher,Binding,Number of Pages,Year Published,Original Publication Year,Date Added
0,276788,043935806X,7,??????? [Tathakathit],Dhruba Chandra Gautam,"Gautam, Dhruba Chandra",??????????? ????,3.5,???? ???????,Paperback,206.0,2005.0,2002.0,12/5/2022
1,278356,043935806X,10,??????? [Tathakathit],Dhruba Chandra Gautam,"Gautam, Dhruba Chandra",??????????? ????,3.5,???? ???????,Paperback,206.0,2005.0,2002.0,12/5/2022
2,254,043935806X,0,??????? [Tathakathit],Dhruba Chandra Gautam,"Gautam, Dhruba Chandra",??????????? ????,3.5,???? ???????,Paperback,206.0,2005.0,2002.0,12/5/2022
3,2033,043935806X,9,??????? [Tathakathit],Dhruba Chandra Gautam,"Gautam, Dhruba Chandra",??????????? ????,3.5,???? ???????,Paperback,206.0,2005.0,2002.0,12/5/2022
4,4896,043935806X,0,??????? [Tathakathit],Dhruba Chandra Gautam,"Gautam, Dhruba Chandra",??????????? ????,3.5,???? ???????,Paperback,206.0,2005.0,2002.0,12/5/2022


In [930]:
pt = final_ratings.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')

In [931]:
pt.fillna(0,inplace=True)

In [932]:
pt

User-ID,254,503,660,805,882,901,929,1083,1254,1424,...,278160,278162,278257,278356,278418,278541,278563,278633,278832,278843
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
?? ???? ????? ????? ??????? [Aaja Pheri Arko Tanna Pherincha],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0
?? ????? ? [Aaja Ramita Chha],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
?? ????? [Jaya Bhundi],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
??? ??????? [Tin Sambatsar],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
???! ???????? [Aama! Januhos],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
???- ???? ??????????? [Soch - euta aatmabishwas],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
???? ?? [Brishav Vadh],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
???? ??? [Seto Bagh],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
???? ????? [Doshi Chashma],0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
???? [Chuli],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [933]:
from sklearn.metrics.pairwise import cosine_similarity

In [934]:
similarity_scores = cosine_similarity(pt)

In [935]:
similarity_scores.shape

(44, 44)

In [942]:
def recommend(book_name):
    # index fetch
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x:x[1],reverse=True)[1:5]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        
        data.append(item)
    
    return data

In [943]:
recommend('???? [Dabali]')

[['?????? ????? [Godhuli Sansar]', 'Shankar Lamichhane'],
 ['???????? ?????? [Chakamannama Ekanta]', 'Dhruba Chandra Gautam'],
 ['????????? ?????? [Pratinidhi Kathaharu]', 'Parashu Pradhan'],
 ['???????? ??? [Narendra Dai]', 'Bishweshwar Prasad Koirala']]

In [945]:
pt.index[545]

User-ID,254,503,660,805,882,901,929,1083,1254,1424,...,278160,278162,278257,278356,278418,278541,278563,278633,278832,278843
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
?? ???? ????? ????? ??????? [Aaja Pheri Arko Tanna Pherincha],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0
?? ????? ? [Aaja Ramita Chha],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
?? ????? [Jaya Bhundi],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
??? ??????? [Tin Sambatsar],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
???! ???????? [Aama! Januhos],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
???- ???? ??????????? [Soch - euta aatmabishwas],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
???? ?? [Brishav Vadh],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0
???? ??? [Seto Bagh],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
???? ????? [Doshi Chashma],0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
???? [Chuli],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
import pickle
pickle.dump(popular_df,open('popular.pkl','wb'))

In [None]:
books.drop_duplicates('Book-Title')

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
...,...,...,...,...,...,...,...,...
271354,0449906736,Flashpoints: Promise and Peril in a New World,Robin Wright,1993,Ballantine Books,http://images.amazon.com/images/P/0449906736.0...,http://images.amazon.com/images/P/0449906736.0...,http://images.amazon.com/images/P/0449906736.0...
271356,0525447644,From One to One Hundred,Teri Sloat,1991,Dutton Books,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...
271357,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...
271358,0192126040,Republic (World's Classics),Plato,1996,Oxford University Press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...


In [None]:
pickle.dump(pt,open('pt.pkl','wb'))
pickle.dump(books,open('books.pkl','wb'))
pickle.dump(similarity_scores,open('similarity_scores.pkl','wb'))