# Book Recommender System 

### 1.Data loading

In [65]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [66]:
books = pd.read_csv(
    'books_data/BX-Books.csv',
    sep=";",
    on_bad_lines='skip', 
    encoding='latin-1'
)

  books = pd.read_csv(


In [67]:
books.head(3)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...


In [68]:
books.shape

(271360, 8)

### 2.Data Cleaning & Column Selection - BX-Books.csv

In [69]:
books.columns

Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],
      dtype='object')

In [70]:
books = books[[
    'ISBN', 'Book-Title' , 'Book-Author', 'Year-Of-Publication'
   ,'Publisher' , 'Image-URL-L'
]]

In [71]:
books.head(3)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...


In [72]:
books.rename(columns={
    "Book-Title":"Title" , "Book-Author":"Author"
   ,"Book-Author":"Author", "Year-Of-Publication":"Year"
   ,"Image-URL-L":"Image_URL"
} , inplace=True )

In [73]:
books.head(3)

Unnamed: 0,ISBN,Title,Author,Year,Publisher,Image_URL
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...


##### Data BX-Users & BX-Book-Ratings

In [74]:
users = pd.read_csv(
    'books_data/BX-Users.csv',
    sep=";",
    on_bad_lines='skip', 
    encoding='latin-1'
)

In [75]:
users.head(3)

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",


In [76]:
users.shape

(278858, 3)

In [77]:
ratings = pd.read_csv(
    'books_data/BX-Book-Ratings.csv',
    sep=";",
    on_bad_lines='skip', 
    encoding='latin-1'
)

In [78]:
ratings.head(3)

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0


In [79]:
ratings.rename(columns={
"Book-Rating":"Rating"
} , inplace=True )

In [80]:
ratings.head(5)

Unnamed: 0,User-ID,ISBN,Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [81]:
ratings.shape

(1149780, 3)

In [82]:
print(ratings.shape)
print(users.shape)
print(books.shape)

(1149780, 3)
(278858, 3)
(271360, 6)


### 2.Book Recommendation Data Prep

In [83]:
min_ratings_threshold = 200
is_active_user = ratings['User-ID'].value_counts() > min_ratings_threshold

In [84]:
is_active_user[is_active_user].shape # Filter active users (count > 200) and get the shape (count of active users)

(899,)

In [85]:
active_user_ids = is_active_user[is_active_user].index
active_user_ids


Index([ 11676, 198711, 153662,  98391,  35859, 212898, 278418,  76352, 110973,
       235105,
       ...
       116122,  44296,  28634,  59727,  73681, 274808, 188951,   9856, 155916,
       268622],
      dtype='int64', name='User-ID', length=899)

In [86]:
# Keep only ratings from power users
ratings = ratings[ratings['User-ID'].isin(active_user_ids)]

In [87]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Rating
1456,277427,002542730X,10
1457,277427,0026217457,0
1458,277427,003008685X,8
1459,277427,0030615321,0
1460,277427,0060002050,0


In [88]:
ratings.shape

(526356, 3)

In [89]:
books.head(2)

Unnamed: 0,ISBN,Title,Author,Year,Publisher,Image_URL
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...


In [90]:
ratings_with_books = ratings.merge(books,on="ISBN")

In [91]:
ratings_with_books.head()

Unnamed: 0,User-ID,ISBN,Rating,Title,Author,Year,Publisher,Image_URL
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
1,277427,0026217457,0,Vegetarian Times Complete Cookbook,Lucy Moll,1995,John Wiley &amp; Sons,http://images.amazon.com/images/P/0026217457.0...
2,277427,003008685X,8,Pioneers,James Fenimore Cooper,1974,Thomson Learning,http://images.amazon.com/images/P/003008685X.0...
3,277427,0030615321,0,"Ask for May, Settle for June (A Doonesbury book)",G. B. Trudeau,1982,Henry Holt &amp; Co,http://images.amazon.com/images/P/0030615321.0...
4,277427,0060002050,0,On a Wicked Dawn (Cynster Novels),Stephanie Laurens,2002,Avon Books,http://images.amazon.com/images/P/0060002050.0...


In [92]:
ratings_with_books.shape

(487671, 8)

In [93]:
# Count number of ratings per book
num_rating = ratings_with_books.groupby('Title')['Rating'].count().reset_index()
num_rating.head()

Unnamed: 0,Title,Rating
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1


In [94]:
num_rating.rename(columns={"Rating":"num_of_Rating"}, inplace=True)
num_rating.head()

Unnamed: 0,Title,num_of_Rating
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1


In [95]:
ratings_with_books.head()

Unnamed: 0,User-ID,ISBN,Rating,Title,Author,Year,Publisher,Image_URL
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
1,277427,0026217457,0,Vegetarian Times Complete Cookbook,Lucy Moll,1995,John Wiley &amp; Sons,http://images.amazon.com/images/P/0026217457.0...
2,277427,003008685X,8,Pioneers,James Fenimore Cooper,1974,Thomson Learning,http://images.amazon.com/images/P/003008685X.0...
3,277427,0030615321,0,"Ask for May, Settle for June (A Doonesbury book)",G. B. Trudeau,1982,Henry Holt &amp; Co,http://images.amazon.com/images/P/0030615321.0...
4,277427,0060002050,0,On a Wicked Dawn (Cynster Novels),Stephanie Laurens,2002,Avon Books,http://images.amazon.com/images/P/0060002050.0...


In [96]:
final_rating = ratings_with_books.merge(num_rating , on='Title')

In [97]:
final_rating.head(2)

Unnamed: 0,User-ID,ISBN,Rating,Title,Author,Year,Publisher,Image_URL,num_of_Rating
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
1,277427,0026217457,0,Vegetarian Times Complete Cookbook,Lucy Moll,1995,John Wiley &amp; Sons,http://images.amazon.com/images/P/0026217457.0...,7


In [98]:
# Keep only books with at least 50 ratings
final_rating = final_rating[final_rating['num_of_Rating']>=50]

In [99]:
final_rating.sample()

Unnamed: 0,User-ID,ISBN,Rating,Title,Author,Year,Publisher,Image_URL,num_of_Rating
199257,113904,440226430,7,Summer Sisters,Judy Blume,1999,Dell Publishing Company,http://images.amazon.com/images/P/0440226430.0...,188


In [100]:
final_rating.shape

(61853, 9)