In [60]:

import numpy as np
import pandas as pd

In [61]:
books = pd.read_csv('books.csv')
users = pd.read_csv('users.csv')
ratings = pd.read_csv('ratings.csv')

  books = pd.read_csv('books.csv')


In [62]:
books['Image-URL-M'][1]

'http://images.amazon.com/images/P/0002005018.01.MZZZZZZZ.jpg'

In [63]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [64]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [65]:
print(books.shape)
print(ratings.shape)
print(users.shape)

(271360, 8)
(1149780, 3)
(278858, 3)


In [66]:
books.isnull().sum()

ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

In [67]:
users.isnull().sum()

User-ID          0
Location         0
Age         110762
dtype: int64

In [68]:
ratings.isnull().sum()

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [69]:
books.duplicated().sum()

0

In [70]:
ratings.duplicated().sum()

0

In [71]:
users.duplicated().sum()

0

## Popularity Based Recommender System

In [72]:
ratings_with_name = ratings.merge(books,on='ISBN')

In [73]:
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating':'num_ratings'},inplace=True)
num_rating_df

Unnamed: 0,Book-Title,num_ratings
0,Earth Prayers From around the World: 365 Pray...,10
1,Final Fantasy Anthology: Official Strategy Gu...,4
2,Flight of Fancy: American Heiresses (Zebra Ba...,2
3,Little Comic Shop of Horrors (Give Yourself G...,4
4,Mystery Mile,2
...,...,...
68750,Ã?Â?lpiraten.,2
68751,Ã?Â?rger mit Produkt X. Roman.,4
68752,Ã?Â?sterlich leben.,1
68753,Ã?Â?stlich der Berge.,3


In [74]:
avg_rating_df = ratings_with_name.groupby('Book-Title', as_index=False)['Book-Rating'].mean()
avg_rating_df.rename(columns={'Book-Rating': 'avg_rating'}, inplace=True)

print(avg_rating_df)  # Display the dataframe


                                              Book-Title  avg_rating
0       Earth Prayers From around the World: 365 Pray...    5.000000
1       Final Fantasy Anthology: Official Strategy Gu...    5.000000
2       Flight of Fancy: American Heiresses (Zebra Ba...    4.000000
3       Little Comic Shop of Horrors (Give Yourself G...    1.250000
4                                           Mystery Mile    0.000000
...                                                  ...         ...
68750                                      Ã?Â?lpiraten.    0.000000
68751                     Ã?Â?rger mit Produkt X. Roman.    5.250000
68752                                Ã?Â?sterlich leben.    7.000000
68753                              Ã?Â?stlich der Berge.    2.666667
68754                                  Ã?Â?thique en toc    4.000000

[68755 rows x 2 columns]


In [75]:
popular_df = num_rating_df.merge(avg_rating_df,on='Book-Title')
popular_df

Unnamed: 0,Book-Title,num_ratings,avg_rating
0,Earth Prayers From around the World: 365 Pray...,10,5.000000
1,Final Fantasy Anthology: Official Strategy Gu...,4,5.000000
2,Flight of Fancy: American Heiresses (Zebra Ba...,2,4.000000
3,Little Comic Shop of Horrors (Give Yourself G...,4,1.250000
4,Mystery Mile,2,0.000000
...,...,...,...
68750,Ã?Â?lpiraten.,2,0.000000
68751,Ã?Â?rger mit Produkt X. Roman.,4,5.250000
68752,Ã?Â?sterlich leben.,1,7.000000
68753,Ã?Â?stlich der Berge.,3,2.666667


In [76]:
popular_df = popular_df[popular_df['num_ratings']>=250].sort_values('avg_rating',ascending=False).head(50)

In [77]:
popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_ratings','avg_rating']]

In [78]:
popular_df['Image-URL-M'][0]

'http://images.amazon.com/images/P/043935806X.01.MZZZZZZZ.jpg'

## Collaborative Filtering Based Recommender System

In [79]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
padhe_likhe_users = x[x].index

In [89]:
padhe_likhe_users

Index([  8890,  11601,  11676,  16795,  23768,  23902,  26544,  35859,  36606,
        36836,  52584,  55492,  56399,  60244,  69697,  72992,  73394,  76352,
        76626,  78783,  87141,  93047,  95359,  98391,  98741, 102967, 110973,
       115120, 127233, 129358, 135149, 142524, 148744, 153662, 160032, 162639,
       170229, 171118, 172742, 173632, 175003, 177458, 182085, 182987, 185233,
       189334, 189835, 190925, 198711, 204864, 212898, 213350, 217740, 226545,
       227447, 230522, 231210, 232131, 234623, 235105, 238781, 241198, 242824,
       245963, 261105, 266226, 275970, 278418],
      dtype='int64', name='User-ID')

In [80]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(padhe_likhe_users)]

In [90]:
filtered_rating

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
5,23768,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
18,98391,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
47,227447,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
65,278418,038550120X,0,A Painted House,JOHN GRISHAM,2001,Doubleday,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...
70,11676,038550120X,10,A Painted House,JOHN GRISHAM,2001,Doubleday,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...
...,...,...,...,...,...,...,...,...,...,...
196714,275970,1880837927,0,The Theology of the Hammer,Millard Fuller,1994,Smyth &amp; Helwys Publishing,http://images.amazon.com/images/P/1880837927.0...,http://images.amazon.com/images/P/1880837927.0...,http://images.amazon.com/images/P/1880837927.0...
196715,275970,188717897X,0,The Ordeal of Integration: Progress and Resent...,Orlando Patterson,1998,Civitas Book Publisher,http://images.amazon.com/images/P/188717897X.0...,http://images.amazon.com/images/P/188717897X.0...,http://images.amazon.com/images/P/188717897X.0...
196716,275970,1888889047,0,Pushcart's Complete Rotten Reviews &amp; Rejec...,Bill Henderson,1998,Pushcart Press,http://images.amazon.com/images/P/1888889047.0...,http://images.amazon.com/images/P/1888889047.0...,http://images.amazon.com/images/P/1888889047.0...
196717,275970,1931868123,0,There's a Porcupine in My Outhouse: Misadventu...,Mike Tougias,2002,Capital Books (VA),http://images.amazon.com/images/P/1931868123.0...,http://images.amazon.com/images/P/1931868123.0...,http://images.amazon.com/images/P/1931868123.0...


In [102]:
y = filtered_rating.groupby('Book-Title').count()['Book-Rating']>=10
famous_books = y[y].index

In [103]:
famous_books

Index(['16 Lighthouse Road', '204 Rosewood Lane', '311 Pelican Court',
       'A 2nd Helping of Chicken Soup for the Soul (Chicken Soup for the Soul Series (Paper))',
       'A Child Called \It\": One Child's Courage to Survive"',
       'A Fine Balance', 'A Painted House', 'A Patchwork Planet',
       'A Tangled Web', 'A Widow for One Year',
       ...
       'The Tale of the Body Thief (Vampire Chronicles (Paperback))',
       'The Web (Alex Delaware Series, No. 10)', 'Thursday'S At Eight',
       'Time Bomb (Alex Delaware Novels (Paperback))', 'Turtle Moon',
       'Undead and Unwed (Berkley Sensation)', 'Up Island: A Novel',
       'Welcome to the World, Baby Girl!',
       'What Looks Like Crazy On An Ordinary Day', 'White Mountain'],
      dtype='object', name='Book-Title', length=128)

In [104]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [126]:
final_ratings

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
65,278418,038550120X,0,A Painted House,JOHN GRISHAM,2001,Doubleday,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...
70,11676,038550120X,10,A Painted House,JOHN GRISHAM,2001,Doubleday,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...
85,23768,038550120X,0,A Painted House,JOHN GRISHAM,2001,Doubleday,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...
128,98741,038550120X,0,A Painted House,JOHN GRISHAM,2001,Doubleday,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...
178,162639,038550120X,0,A Painted House,JOHN GRISHAM,2001,Doubleday,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...,http://images.amazon.com/images/P/038550120X.0...
...,...,...,...,...,...,...,...,...,...,...
146817,177458,039914823X,0,How to Be Good,Nick Hornby,2001,Putnam Adult,http://images.amazon.com/images/P/039914823X.0...,http://images.amazon.com/images/P/039914823X.0...,http://images.amazon.com/images/P/039914823X.0...
154956,102967,051511992x,0,That Camden Summer,Lavyrle Spencer,1997,Jove Books,http://images.amazon.com/images/P/051511992X.0...,http://images.amazon.com/images/P/051511992X.0...,http://images.amazon.com/images/P/051511992X.0...
169661,148744,1551663163,0,Montana,Debbie Macomber,1997,Mira Books,http://images.amazon.com/images/P/1551663163.0...,http://images.amazon.com/images/P/1551663163.0...,http://images.amazon.com/images/P/1551663163.0...
177299,177458,1570429898,0,The First Counsel,Brad Meltzer,2001,Time Warner Audio Major,http://images.amazon.com/images/P/1570429898.0...,http://images.amazon.com/images/P/1570429898.0...,http://images.amazon.com/images/P/1570429898.0...


In [106]:
pt = final_ratings.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')

In [107]:
pt.fillna(0,inplace=True)

In [108]:
pt

User-ID,11601,11676,16795,23768,23902,26544,35859,36606,36836,52584,...,232131,234623,235105,238781,242824,245963,261105,266226,275970,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16 Lighthouse Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
204 Rosewood Lane,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
311 Pelican Court,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A 2nd Helping of Chicken Soup for the Soul (Chicken Soup for the Soul Series (Paper)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"A Child Called \It\"": One Child's Courage to Survive""",0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Undead and Unwed (Berkley Sensation),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Up Island: A Novel,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Welcome to the World, Baby Girl!",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
What Looks Like Crazy On An Ordinary Day,0.0,10.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [109]:
from sklearn.metrics.pairwise import cosine_similarity

In [110]:
similarity_scores = cosine_similarity(pt)

In [None]:
similarity_scores.shape

(706, 706)

In [111]:
def recommend(book_name):
    # index fetch
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x:x[1],reverse=True)[1:5]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        
        data.append(item)
    
    return data

In [129]:
recommend("Clara Callan")

IndexError: index 0 is out of bounds for axis 0 with size 0

In [130]:
pt.index[0]

'16 Lighthouse Road'

In [None]:
import pickle
pickle.dump(popular_df,open('popular.pkl','wb'))

In [127]:
books.drop_duplicates('Book-Title')

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
...,...,...,...,...,...,...,...,...
271354,449906736,Flashpoints: Promise and Peril in a New World,Robin Wright,1993,Ballantine Books,http://images.amazon.com/images/P/0449906736.0...,http://images.amazon.com/images/P/0449906736.0...,http://images.amazon.com/images/P/0449906736.0...
271356,525447644,From One to One Hundred,Teri Sloat,1991,Dutton Books,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...
271357,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...
271358,192126040,Republic (World's Classics),Plato,1996,Oxford University Press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...


In [None]:
pickle.dump(pt,open('pt.pkl','wb'))
pickle.dump(books,open('books.pkl','wb'))
pickle.dump(similarity_scores,open('similarity_scores.pkl','wb'))