## Books Recommender System

### importing libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
books_df = pd.read_csv('Books.csv')
rating_df = pd.read_csv('Ratings.csv')
user_df = pd.read_csv('Users.csv')

  books_df = pd.read_csv('Books.csv')


In [3]:
books_df.sample(2)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
263431,553180932,Garden of Rama,Arthur C. Clarke,1992,Spectra Books,http://images.amazon.com/images/P/0553180932.0...,http://images.amazon.com/images/P/0553180932.0...,http://images.amazon.com/images/P/0553180932.0...
79697,446607703,30 Seconds,Sam,1999,Warner Books,http://images.amazon.com/images/P/0446607703.0...,http://images.amazon.com/images/P/0446607703.0...,http://images.amazon.com/images/P/0446607703.0...


In [4]:
user_df.sample(2)

Unnamed: 0,User-ID,Location,Age
249361,249362,"london, n/a, united kingdom",25.0
102321,102322,"selm, nrw, germany",54.0


In [5]:
rating_df.sample(2)

Unnamed: 0,User-ID,ISBN,Book-Rating
930220,225910,679450432,8
874944,211919,60199652,0


### Checking for null values

In [6]:
print(books_df.isnull().sum())
print()
print(user_df.isnull().sum())
print()
print(rating_df.isnull().sum())

ISBN                   0
Book-Title             0
Book-Author            1
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

User-ID          0
Location         0
Age         110762
dtype: int64

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64


## Checking for Duplicates

In [7]:
books_df.duplicated().sum()

0

In [8]:
rating_df.duplicated().sum()

0

In [9]:
user_df.duplicated().sum()

0

## Popularity based Recommender System

1. Books with atleast 200 Rating count

In [10]:
rwn = rating_df.merge(books_df, on= 'ISBN')

In [11]:
rwn.sample(5)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
469650,160735,440236819,0,The Cottage,Danielle Steel,2003,Dell Publishing Company,http://images.amazon.com/images/P/0440236819.0...,http://images.amazon.com/images/P/0440236819.0...,http://images.amazon.com/images/P/0440236819.0...
109195,261603,449227545,0,The Hammer of Eden: A Novel,Ken Follett,1999,Fawcett Books,http://images.amazon.com/images/P/0449227545.0...,http://images.amazon.com/images/P/0449227545.0...,http://images.amazon.com/images/P/0449227545.0...
403787,76626,451155580,10,Once on a Time,A.A. Milne,1988,New Amer Library (Mm),http://images.amazon.com/images/P/0451155580.0...,http://images.amazon.com/images/P/0451155580.0...,http://images.amazon.com/images/P/0451155580.0...
48747,268282,345334019,0,Escape the Night,Richard North Patterson,1995,Ballantine Books,http://images.amazon.com/images/P/0345334019.0...,http://images.amazon.com/images/P/0345334019.0...,http://images.amazon.com/images/P/0345334019.0...
506388,115572,64471055,8,Prince Caspian (rack) : The Return to Narnia (...,C. S. Lewis,1994,HarperCollins,http://images.amazon.com/images/P/0064471055.0...,http://images.amazon.com/images/P/0064471055.0...,http://images.amazon.com/images/P/0064471055.0...


In [12]:
count_rating = rwn.groupby('Book-Title').count()['Book-Rating'].reset_index()
count_rating.rename(columns={'Book-Rating':'num_rating'}, inplace=True)
count_rating

Unnamed: 0,Book-Title,num_rating
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1
...,...,...
241066,Ã?Â?lpiraten.,2
241067,Ã?Â?rger mit Produkt X. Roman.,4
241068,Ã?Â?sterlich leben.,1
241069,Ã?Â?stlich der Berge.,3


In [13]:
avg_rating = rwn.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_rating['Book-Rating']=avg_rating['Book-Rating'].round(decimals =2)
avg_rating

Unnamed: 0,Book-Title,Book-Rating
0,A Light in the Storm: The Civil War Diary of ...,2.25
1,Always Have Popsicles,0.00
2,Apple Magic (The Collector's series),0.00
3,"Ask Lily (Young Women of Faith: Lily Series, ...",8.00
4,Beyond IBM: Leadership Marketing and Finance ...,0.00
...,...,...
241066,Ã?Â?lpiraten.,0.00
241067,Ã?Â?rger mit Produkt X. Roman.,5.25
241068,Ã?Â?sterlich leben.,7.00
241069,Ã?Â?stlich der Berge.,2.67


In [14]:
## Merging avg and count of rating DF
popular_df = avg_rating.merge(count_rating, on= 'Book-Title')
popular_df

Unnamed: 0,Book-Title,Book-Rating,num_rating
0,A Light in the Storm: The Civil War Diary of ...,2.25,4
1,Always Have Popsicles,0.00,1
2,Apple Magic (The Collector's series),0.00,1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",8.00,1
4,Beyond IBM: Leadership Marketing and Finance ...,0.00,1
...,...,...,...
241066,Ã?Â?lpiraten.,0.00,2
241067,Ã?Â?rger mit Produkt X. Roman.,5.25,4
241068,Ã?Â?sterlich leben.,7.00,1
241069,Ã?Â?stlich der Berge.,2.67,3


In [15]:
books_df[['Book-Title','Book-Author']]

Unnamed: 0,Book-Title,Book-Author
0,Classical Mythology,Mark P. O. Morford
1,Clara Callan,Richard Bruce Wright
2,Decision in Normandy,Carlo D'Este
3,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata
4,The Mummies of Urumchi,E. J. W. Barber
...,...,...
271355,There's a Bat in Bunk Five,Paula Danziger
271356,From One to One Hundred,Teri Sloat
271357,Lily Dale : The True Story of the Town that Ta...,Christine Wicker
271358,Republic (World's Classics),Plato


In [16]:
popular_df = popular_df[popular_df['num_rating']>100].sort_values(by = 'Book-Rating', ascending=False)
popular_df

Unnamed: 0,Book-Title,Book-Rating,num_rating
80434,Harry Potter and the Prisoner of Azkaban (Book 3),5.85,428
195672,The Little Prince,5.82,141
80422,Harry Potter and the Goblet of Fire (Book 4),5.82,387
80441,Harry Potter and the Sorcerer's Stone (Book 1),5.74,278
80426,Harry Potter and the Order of the Phoenix (Boo...,5.50,347
...,...,...,...
141276,Pleading Guilty,1.46,136
220317,Tom Clancy's Op-Center (Tom Clancy's Op Center...,1.44,134
233567,"Whirlwind (Tyler, Book 1)",1.34,107
205836,The Sands of Time,1.17,105


In [17]:
popular_df = popular_df.merge(books_df,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Book-Rating','num_rating','Image-URL-M']]
popular_df

Unnamed: 0,Book-Title,Book-Author,Book-Rating,num_rating,Image-URL-M
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,5.85,428,http://images.amazon.com/images/P/0439136350.0...
3,The Little Prince,Antoine de Saint-ExupÃ©ry,5.82,141,http://images.amazon.com/images/P/0156528207.0...
8,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,5.82,387,http://images.amazon.com/images/P/0439139597.0...
10,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,5.74,278,http://images.amazon.com/images/P/0590353403.0...
14,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,5.50,347,http://images.amazon.com/images/P/043935806X.0...
...,...,...,...,...,...
2971,Pleading Guilty,Scott Turow,1.46,136,http://images.amazon.com/images/P/0446365505.0...
2973,Tom Clancy's Op-Center (Tom Clancy's Op Center...,Tom Clancy,1.44,134,http://images.amazon.com/images/P/0425147363.0...
2974,"Whirlwind (Tyler, Book 1)",Nancy Martin,1.34,107,http://images.amazon.com/images/P/0373825013.0...
2975,The Sands of Time,Sidney Sheldon,1.17,105,http://images.amazon.com/images/P/0446356832.0...


In [18]:
Top_50 = popular_df[popular_df['num_rating']>200].sort_values(by = 'Book-Rating', ascending=False).head(50)
Top_50

Unnamed: 0,Book-Title,Book-Author,Book-Rating,num_rating,Image-URL-M
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,5.85,428,http://images.amazon.com/images/P/0439136350.0...
8,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,5.82,387,http://images.amazon.com/images/P/0439139597.0...
10,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,5.74,278,http://images.amazon.com/images/P/0590353403.0...
14,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,5.5,347,http://images.amazon.com/images/P/043935806X.0...
18,Ender's Game (Ender Wiggins Saga (Paperback)),Orson Scott Card,5.41,249,http://images.amazon.com/images/P/0312853238.0...
22,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,5.18,556,http://images.amazon.com/images/P/0439064872.0...
27,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,5.01,281,http://images.amazon.com/images/P/0345339681.0...
28,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,4.95,368,http://images.amazon.com/images/P/0345339703.0...
40,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,4.9,575,http://images.amazon.com/images/P/059035342X.0...
45,"The Two Towers (The Lord of the Rings, Part 2)",J.R.R. TOLKIEN,4.88,260,http://images.amazon.com/images/P/0345339711.0...


In [22]:
Top_50 = Top_50.merge(books_df, on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author', 'Book-Rating', 'num_rating', 'Image-URL-M']]
Top_50.sample(2)

Unnamed: 0,Book-Title,Book-Author,Book-Rating,num_rating,Image-URL-M
57,The Catcher in the Rye,J.D. Salinger,4.55,449,http://images.amazon.com/images/P/0316769487.0...
150,Girl with a Pearl Earring,Tracy Chevalier,4.22,526,http://images.amazon.com/images/P/0452282152.0...


In [23]:
Top_50.columns

Index(['Book-Title', 'Book-Author', 'Book-Rating', 'num_rating',
       'Image-URL-M'],
      dtype='object')

In [24]:
Top_50 = Top_50[['Book-Title','Book-Author', 'Book-Rating', 'num_rating', 'Image-URL-M']]

## Collaborative Filterbased Recommender System

In [25]:
rwn

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
...,...,...,...,...,...,...,...,...,...,...
1031131,276688,0517145553,0,Mostly Harmless,Douglas Adams,1995,Random House Value Pub,http://images.amazon.com/images/P/0517145553.0...,http://images.amazon.com/images/P/0517145553.0...,http://images.amazon.com/images/P/0517145553.0...
1031132,276688,1575660792,7,Gray Matter,Shirley Kennett,1996,Kensington Publishing Corporation,http://images.amazon.com/images/P/1575660792.0...,http://images.amazon.com/images/P/1575660792.0...,http://images.amazon.com/images/P/1575660792.0...
1031133,276690,0590907301,0,Triplet Trouble and the Class Trip (Triplet Tr...,Debbie Dadey,1997,Apple,http://images.amazon.com/images/P/0590907301.0...,http://images.amazon.com/images/P/0590907301.0...,http://images.amazon.com/images/P/0590907301.0...
1031134,276704,0679752714,0,A Desert of Pure Feeling (Vintage Contemporaries),Judith Freeman,1997,Vintage Books USA,http://images.amazon.com/images/P/0679752714.0...,http://images.amazon.com/images/P/0679752714.0...,http://images.amazon.com/images/P/0679752714.0...


In [26]:
x = rwn.groupby('User-ID').count()['Book-Rating']>200
user_rated = x[x].index

In [27]:
filtered_rating = rwn[rwn['User-ID'].isin(user_rated)]

In [28]:
y = filtered_rating.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = y[y].index    

In [29]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]
final_ratings.sample(2)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
265605,31315,399147284,10,Chosen Prey,John Sandford,2001,G. P. Putnam's Sons,http://images.amazon.com/images/P/0399147284.0...,http://images.amazon.com/images/P/0399147284.0...,http://images.amazon.com/images/P/0399147284.0...
202837,225810,312278586,0,The Nanny Diaries: A Novel,Emma McLaughlin,2002,St. Martin's Press,http://images.amazon.com/images/P/0312278586.0...,http://images.amazon.com/images/P/0312278586.0...,http://images.amazon.com/images/P/0312278586.0...


In [30]:
pt = final_ratings.pivot_table(index = 'Book-Title', columns= 'User-ID', values= 'Book-Rating')

In [31]:
pt.fillna(0, inplace=True)

In [32]:
pt

User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
from sklearn.metrics.pairwise import cosine_similarity
similar = cosine_similarity(pt)

In [34]:
similar

array([[1.        , 0.10255025, 0.01220856, ..., 0.12110367, 0.07347567,
        0.04316046],
       [0.10255025, 1.        , 0.2364573 , ..., 0.07446129, 0.16773875,
        0.14263397],
       [0.01220856, 0.2364573 , 1.        , ..., 0.04558758, 0.04938579,
        0.10796119],
       ...,
       [0.12110367, 0.07446129, 0.04558758, ..., 1.        , 0.07085128,
        0.0196177 ],
       [0.07347567, 0.16773875, 0.04938579, ..., 0.07085128, 1.        ,
        0.10602962],
       [0.04316046, 0.14263397, 0.10796119, ..., 0.0196177 , 0.10602962,
        1.        ]])

In [35]:
sorted(list(enumerate(similar[0])), key=lambda x: x[1], reverse=True)[1:6]

[(47, 0.2702651417103732),
 (545, 0.2639619371123497),
 (82, 0.23669374347400993),
 (634, 0.23299389358170394),
 (551, 0.2262639743141286)]

In [63]:
def recommend(book_name):
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(similar[index])), key=lambda x: x[1], reverse=True)[1:6]
    data=[]
    #for i in similar_items:
       # print(pt.index[i[0]])
        
    for i in similar_items:    
        item=[]
        temp_df = books_df[books_df['Book-Title']==pt.index[i[0]]]
        
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))  
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
                    
        
        data.append(item)            
    return data
        
        

In [65]:
recommend("1984")

[['Animal Farm',
  'George Orwell',
  'http://images.amazon.com/images/P/0451526341.01.MZZZZZZZ.jpg'],
 ["The Handmaid's Tale",
  'Margaret Atwood',
  'http://images.amazon.com/images/P/0449212602.01.MZZZZZZZ.jpg'],
 ['Brave New World',
  'Aldous Huxley',
  'http://images.amazon.com/images/P/0060809833.01.MZZZZZZZ.jpg'],
 ['The Vampire Lestat (Vampire Chronicles, Book II)',
  'ANNE RICE',
  'http://images.amazon.com/images/P/0345313860.01.MZZZZZZZ.jpg'],
 ['The Hours : A Novel',
  'Michael Cunningham',
  'http://images.amazon.com/images/P/0312243022.01.MZZZZZZZ.jpg']]

In [38]:
popular_df

Unnamed: 0,Book-Title,Book-Author,Book-Rating,num_rating,Image-URL-M
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,5.85,428,http://images.amazon.com/images/P/0439136350.0...
3,The Little Prince,Antoine de Saint-ExupÃ©ry,5.82,141,http://images.amazon.com/images/P/0156528207.0...
8,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,5.82,387,http://images.amazon.com/images/P/0439139597.0...
10,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,5.74,278,http://images.amazon.com/images/P/0590353403.0...
14,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,5.50,347,http://images.amazon.com/images/P/043935806X.0...
...,...,...,...,...,...
2971,Pleading Guilty,Scott Turow,1.46,136,http://images.amazon.com/images/P/0446365505.0...
2973,Tom Clancy's Op-Center (Tom Clancy's Op Center...,Tom Clancy,1.44,134,http://images.amazon.com/images/P/0425147363.0...
2974,"Whirlwind (Tyler, Book 1)",Nancy Martin,1.34,107,http://images.amazon.com/images/P/0373825013.0...
2975,The Sands of Time,Sidney Sheldon,1.17,105,http://images.amazon.com/images/P/0446356832.0...


In [40]:
Top_50.shape

(50, 5)

In [42]:
import pickle
#pickle.dump(popular_df, open('popular.pkl','wb'))

In [43]:
pickle.dump(Top_50, open('Top_50.pkl','wb'))

In [66]:
pickle.dump(books_df,open('book_df.pkl', 'wb'))

In [67]:
pickle.dump(pt,open('pt.pkl', 'wb'))

In [68]:
pickle.dump(similar,open('similar.pkl', 'wb'))