# Book Recommendation System 

In [176]:
import os,sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

#### Books

In [177]:
books = pd.read_csv('C:\\Users\\Swarn\\Downloads\\Books (1).csv')
print(books.shape)
books.head(1)

(271360, 8)


Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...


In [178]:
books.isnull().sum()/len(books)*100

ISBN                   0.000000
Book-Title             0.000000
Book-Author            0.000369
Year-Of-Publication    0.000000
Publisher              0.000737
Image-URL-S            0.000000
Image-URL-M            0.000000
Image-URL-L            0.001106
dtype: float64

In [179]:
books.duplicated().sum()

0

#### Ratings

In [180]:
rating = pd.read_csv("C:\\Users\\Swarn\\Downloads\\Ratings (1).csv")
print(rating.shape)
rating.head(1)

(1149780, 3)


Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0


In [181]:
rating.isnull().sum()

User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [182]:
rating.duplicated().sum()

0

### User

In [183]:
user = pd.read_csv("C:\\Users\\Swarn\\Downloads\\Users (1).csv")
print(user.shape)
user.head(1)

(278858, 3)


Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",


In [184]:
user.isnull().sum()

User-ID          0
Location         0
Age         110762
dtype: int64

In [185]:
user.duplicated().sum()

0

Age will not impact in any which way...as it is a personal-content recommendation system and not popular based recommendation system. So we will delete the same.

In [186]:
user.drop(['Age'],axis=1,inplace=True)

In [187]:
print(user.shape)
user.head(1)

(278858, 2)


Unnamed: 0,User-ID,Location
0,1,"nyc, new york, usa"


### Merging all the Files

In [188]:
data = rating.merge(books,on="ISBN")
data.head(1)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...


In [189]:
data = user.merge(data,on='User-ID')
data.head(1)

Unnamed: 0,User-ID,Location,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,2,"stockton, california, usa",195153448,0,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...


## Popularity Based Recommendation System

In [190]:
data['Book-Rating'].value_counts()

0     647294
8      91804
10     71225
7      66402
9      60778
5      45355
6      31687
4       7617
3       5118
2       2375
1       1481
Name: Book-Rating, dtype: int64

In [191]:
data.head(1)

Unnamed: 0,User-ID,Location,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,2,"stockton, california, usa",195153448,0,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...


In [192]:
rating_count = data.groupby(['Book-Title']).count()['Book-Rating'].reset_index()  
# we are going to take a count of ratings given for a particular book. 

rating_count.rename(columns = {'Book-Rating':'Count of Ratings'},inplace=True)  
rating_count.head()

Unnamed: 0,Book-Title,Count of Ratings
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1


In [193]:
rating_wise = data[["Book-Title","Book-Rating"]]
rating_wise.shape

(1031136, 2)

In [194]:
rat_mean = rating_wise.groupby('Book-Title').mean()
rat_mean.rename(columns={'Book-Rating':'Average-Ratings'},inplace=True)
rat_mean.head()

Unnamed: 0_level_0,Average-Ratings
Book-Title,Unnamed: 1_level_1
"A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)",2.25
Always Have Popsicles,0.0
Apple Magic (The Collector's series),0.0
"Ask Lily (Young Women of Faith: Lily Series, Book 5)",8.0
Beyond IBM: Leadership Marketing and Finance for the 1990s,0.0


### Merging the rating-count and rating mean files.

In [195]:
popular_books = rating_count.merge(rat_mean,on='Book-Title')
popular_books.head()

Unnamed: 0,Book-Title,Count of Ratings,Average-Ratings
0,A Light in the Storm: The Civil War Diary of ...,4,2.25
1,Always Have Popsicles,1,0.0
2,Apple Magic (The Collector's series),1,0.0
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.0
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.0


In [196]:
popular_books['Count of Ratings'].unique()

array([   4,    1,    2,   10,    9,   11,   17,    3,    5,   47,    8,
          6,   12,   22,    7,   61,   13,   23,   37,   28,   65,   14,
         20,  284,  509,   24,   25,   90,   21,   71,   57,  106,   30,
        356,   85,  151,   16,   50,   31,  125,   41,   74,   44,   46,
         32,   18,  136,   15,  346,   35,   29,   26,   38,  236,   27,
        265,  156,   33,   52,   83,   67,   81,  139,   92,   54,  108,
         39,  140,  116,   40,   42,   34,   48,  169,  104,   99,  302,
        285,   86,  192,  109,   59,   75,  128,  174,  327,   19,   49,
         36,   51,   66,  838,   68,  149,   64,   82,   63,   76,  413,
         60,   56,  118,   78,   45,   95,   62,   43,   97,  135,  210,
        549,   96,  101,  164,  205,  378,  273,   53,  158,  150,   70,
         69,  279,  262,  121,  126,   77,  141,  332,  112,   84,  187,
         58,  258,   55,  184,  147,  304,  110,   93,  114,   91,   72,
         94,   80,  326,  222,  170,  670,  167,  2

In [197]:
popular_books = popular_books[popular_books['Count of Ratings']>50].sort_values(by="Average-Ratings",ascending=False)

In [198]:
popular_books.head()

Unnamed: 0,Book-Title,Count of Ratings,Average-Ratings
70396,Free,56,8.017857
208575,The Stand (The Complete and Uncut Edition),57,6.175439
77812,Griffin &amp; Sabine: An Extraordinary Corresp...,72,6.041667
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804
80422,Harry Potter and the Goblet of Fire (Book 4),387,5.824289


In [199]:
popular_books.shape

(2381, 3)

In [215]:
popular_books_final = popular_books.merge(books,on='Book-Title').drop_duplicates(subset='Book-Title')[['Book-Title',
                                                                                                      'Image-URL-S',
                                                                                                       'Image-URL-M',
                                                                                                       'Image-URL-L',
                                                                                                       'Count of Ratings',
                                                                                                       'Average-Ratings'
                                                                                                      ]]
print(popular_books_final.shape)
popular_books_final.duplicated().sum()
popular_books_final
popular_books_final

(2381, 6)


Unnamed: 0,Book-Title,Image-URL-S,Image-URL-M,Image-URL-L,Count of Ratings,Average-Ratings
0,Free,http://images.amazon.com/images/P/1844262553.0...,http://images.amazon.com/images/P/1844262553.0...,http://images.amazon.com/images/P/1844262553.0...,56,8.017857
3,The Stand (The Complete and Uncut Edition),http://images.amazon.com/images/P/0385199570.0...,http://images.amazon.com/images/P/0385199570.0...,http://images.amazon.com/images/P/0385199570.0...,57,6.175439
4,Griffin &amp; Sabine: An Extraordinary Corresp...,http://images.amazon.com/images/P/0877017883.0...,http://images.amazon.com/images/P/0877017883.0...,http://images.amazon.com/images/P/0877017883.0...,72,6.041667
5,Harry Potter and the Prisoner of Azkaban (Book 3),http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,428,5.852804
8,Harry Potter and the Goblet of Fire (Book 4),http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,387,5.824289
...,...,...,...,...,...,...
6942,"Night Moves (Tom Clancy's Net Force, No. 3)",http://images.amazon.com/images/P/042517400X.0...,http://images.amazon.com/images/P/042517400X.0...,http://images.amazon.com/images/P/042517400X.0...,72,1.138889
6945,Wild Animus,http://images.amazon.com/images/P/0971880107.0...,http://images.amazon.com/images/P/0971880107.0...,http://images.amazon.com/images/P/0971880107.0...,2502,1.019584
6946,Whirlwind,http://images.amazon.com/images/P/0688066631.0...,http://images.amazon.com/images/P/0688066631.0...,http://images.amazon.com/images/P/0688066631.0...,61,1.000000
6948,Ssn,http://images.amazon.com/images/P/0425173534.0...,http://images.amazon.com/images/P/0425173534.0...,http://images.amazon.com/images/P/0425173534.0...,67,0.970149


# TOP 15 -  most Popular Books

In [213]:
popular_books_final.head(15)

Unnamed: 0,Book-Title,Image-URL-S,Image-URL-M,Image-URL-L,Count of Ratings,Average-Ratings
0,Free,http://images.amazon.com/images/P/1844262553.0...,http://images.amazon.com/images/P/1844262553.0...,http://images.amazon.com/images/P/1844262553.0...,56,8.017857
3,The Stand (The Complete and Uncut Edition),http://images.amazon.com/images/P/0385199570.0...,http://images.amazon.com/images/P/0385199570.0...,http://images.amazon.com/images/P/0385199570.0...,57,6.175439
4,Griffin &amp; Sabine: An Extraordinary Corresp...,http://images.amazon.com/images/P/0877017883.0...,http://images.amazon.com/images/P/0877017883.0...,http://images.amazon.com/images/P/0877017883.0...,72,6.041667
5,Harry Potter and the Prisoner of Azkaban (Book 3),http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,http://images.amazon.com/images/P/0439136350.0...,428,5.852804
8,Harry Potter and the Goblet of Fire (Book 4),http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,http://images.amazon.com/images/P/0439139597.0...,387,5.824289
10,The Little Prince,http://images.amazon.com/images/P/0156528207.0...,http://images.amazon.com/images/P/0156528207.0...,http://images.amazon.com/images/P/0156528207.0...,141,5.815603
15,The Cat in the Hat,http://images.amazon.com/images/P/039480001X.0...,http://images.amazon.com/images/P/039480001X.0...,http://images.amazon.com/images/P/039480001X.0...,53,5.754717
17,Harry Potter and the Sorcerer's Stone (Book 1),http://images.amazon.com/images/P/0590353403.0...,http://images.amazon.com/images/P/0590353403.0...,http://images.amazon.com/images/P/0590353403.0...,278,5.73741
21,The Hobbit,http://images.amazon.com/images/P/0345272579.0...,http://images.amazon.com/images/P/0345272579.0...,http://images.amazon.com/images/P/0345272579.0...,80,5.7
33,Harry Potter and the Order of the Phoenix (Boo...,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...,347,5.501441


# Collaboritive Based Recommendation System

we need to get the data in a format where user are columns and books are rows and the rating are the values. But we will only consider those users who are really readers and have read and shared feedback more than 200 times.

In [234]:
data.head(1)
data.shape

(1031136, 11)

1. Get the data from user perspective -- only prime reader who have atleast reviewd 200 books. 
2. Get the data from book rating perspective  --- books which have been in the review at least more than 50 times. 

User who have rated once or twice can't be very trustworthy. 
Similary books which have got 1/2 feedbacks may not be very popular or useful. 

#### Filtering data from User Perspective

In [231]:
df = data.groupby('User-ID').count()['Book-Rating']>200
prime_users = df[df].index

In [235]:
filtered_df = data[data['User-ID'].isin(prime_users)]
filtered_df.head(1)

Unnamed: 0,User-ID,Location,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
311,254,"minneapolis, minnesota, usa",451167317,8,The Dark Half,Stephen King,1994,Signet Book,http://images.amazon.com/images/P/0451167317.0...,http://images.amazon.com/images/P/0451167317.0...,http://images.amazon.com/images/P/0451167317.0...


##### Filtering using books rated - more than 50 times. 

In [249]:
y = filtered_df.groupby("Book-Title").count()['Book-Rating'] >=50
famous_books = y[y].index   ###155845 count
famous_books

Index(['1984', '1st to Die: A Novel', '2nd Chance', '4 Blondes',
       'A Bend in the Road', 'A Case of Need',
       'A Child Called \It\": One Child's Courage to Survive"',
       'A Civil Action', 'A Day Late and a Dollar Short', 'A Fine Balance',
       ...
       'Winter Solstice', 'Wish You Well', 'Without Remorse',
       'Wizard and Glass (The Dark Tower, Book 4)', 'Wuthering Heights',
       'Year of Wonders', 'You Belong To Me',
       'Zen and the Art of Motorcycle Maintenance: An Inquiry into Values',
       'Zoya', '\O\" Is for Outlaw"'],
      dtype='object', name='Book-Title', length=706)

In [250]:
final_rating=filtered_df[filtered_df['Book-Title'].isin(famous_books)]
final_rating.shape

(58586, 11)

In [251]:
final_rating.drop_duplicates()

Unnamed: 0,User-ID,Location,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
311,254,"minneapolis, minnesota, usa",0451167317,8,The Dark Half,Stephen King,1994,Signet Book,http://images.amazon.com/images/P/0451167317.0...,http://images.amazon.com/images/P/0451167317.0...,http://images.amazon.com/images/P/0451167317.0...
312,254,"minneapolis, minnesota, usa",043935806X,0,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...,http://images.amazon.com/images/P/043935806X.0...
313,254,"minneapolis, minnesota, usa",0804106304,0,The Joy Luck Club,Amy Tan,1994,Prentice Hall (K-12),http://images.amazon.com/images/P/0804106304.0...,http://images.amazon.com/images/P/0804106304.0...,http://images.amazon.com/images/P/0804106304.0...
314,254,"minneapolis, minnesota, usa",0439064864,9,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,1999,Scholastic,http://images.amazon.com/images/P/0439064864.0...,http://images.amazon.com/images/P/0439064864.0...,http://images.amazon.com/images/P/0439064864.0...
316,254,"minneapolis, minnesota, usa",0671021001,7,She's Come Undone (Oprah's Book Club),Wally Lamb,1998,Pocket,http://images.amazon.com/images/P/0671021001.0...,http://images.amazon.com/images/P/0671021001.0...,http://images.amazon.com/images/P/0671021001.0...
...,...,...,...,...,...,...,...,...,...,...,...
1029595,278418,"omaha, nebraska, usa",0679425268,0,Sole Survivor,DEAN KOONTZ,1997,Knopf,http://images.amazon.com/images/P/0679425268.0...,http://images.amazon.com/images/P/0679425268.0...,http://images.amazon.com/images/P/0679425268.0...
1029598,278418,"omaha, nebraska, usa",0679448594,0,Primary Colors: A Novel of Politics,Anonymous,1996,Random House Inc,http://images.amazon.com/images/P/0679448594.0...,http://images.amazon.com/images/P/0679448594.0...,http://images.amazon.com/images/P/0679448594.0...
1029688,278418,"omaha, nebraska, usa",0688042171,0,If Tomorrow Comes,Sidney Sheldon,1985,William Morrow &amp; Company,http://images.amazon.com/images/P/0688042171.0...,http://images.amazon.com/images/P/0688042171.0...,http://images.amazon.com/images/P/0688042171.0...
1030032,278418,"omaha, nebraska, usa",0821759744,0,Dangerous,Becky Barker,1997,Kensington Publishing Corporation,http://images.amazon.com/images/P/0821759744.0...,http://images.amazon.com/images/P/0821759744.0...,http://images.amazon.com/images/P/0821759744.0...


No duplicates. Data is ready. 

Now we just need to make the table x = books, y = users, value = ratings. 

In [256]:
pt = final_rating.pivot_table(index='Book-Title',columns='User-ID',values = 'Book-Rating')

In [257]:
pt

User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,,,,,,,,,,...,10.0,,,,,,0.0,,,
1st to Die: A Novel,,,,,,,,,,9.0,...,,,,,,,,,,
2nd Chance,,10.0,,,,,,,,0.0,...,,,,,,0.0,,,0.0,
4 Blondes,,,,,,,,0.0,,,...,,,,,,,,,,
A Bend in the Road,0.0,,7.0,,,,,,,,...,,0.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,,,,7.0,,,,,,0.0,...,,9.0,,,,,0.0,,,
You Belong To Me,,,,,,,,,0.0,,...,,,,,,,,,,
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,,,,,0.0,,,0.0,,,...,,,,,,,0.0,,,
Zoya,,,,,,,,,,,...,,0.0,,,,,,,,


In [265]:
pt.fillna(0,inplace=True)

In [267]:
from sklearn.metrics.pairwise import cosine_similarity

In [268]:
similarity_score = cosine_similarity(pt)

In [271]:
similarity_score.shape  ## 706*706 matrix is created for all the books. 

(706, 706)

In [319]:
def recommend(books):
    ### fetching the index
    index = np.where(pt.index==books)[0][0]
    similar_books = sorted(list(enumerate(similarity_score[index])),reverse=True,key = lambda x:x[1])[1:6]
    
    for i in similar_books:
            print(pt.index[i[0]])
    
#     return suggestions

In [320]:
recommend('The Dark Half')

The Tommyknockers
Gerald's Game
Four Past Midnight
Skeleton Crew
Insomnia


In [322]:
recommend("You Belong To Me")

Loves Music, Loves to Dance
I'll Be Seeing You
Before I Say Good-Bye
Daddy's Little Girl
All Around the Town


# Done