# Online Book Recommendation System using item-item collaborative filtering

In [1]:
# importing necessary libraries
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

## Datasets file and dropping the unnecessary columns from the dataset

In [2]:
# reading the dataset files
book_ratings = pd.read_csv('ratings.csv', encoding = 'latin-1')
books = pd.read_csv('books.csv', encoding = 'latin-1')

# dropping the unnecessary columns from ratings csv file 
book_ratings = pd.merge(books,book_ratings).drop(['book_id','best_book_id','work_id','books_count','isbn13','original_publication_year','work_text_reviews_count','title','ratings_1','ratings_2','ratings_3','ratings_4','ratings_5','image_url','language_code','average_rating','ratings_count','work_ratings_count','small_image_url'],axis=1)

# head() function is used to test whether the we have right data 
book_ratings.head()

Unnamed: 0,id,isbn,authors,original_title,user_id,rating
0,2,439554934,"J.K. Rowling, Mary GrandPrÃ©",Harry Potter and the Philosopher's Stone,314,3
1,2,439554934,"J.K. Rowling, Mary GrandPrÃ©",Harry Potter and the Philosopher's Stone,588,1
2,2,439554934,"J.K. Rowling, Mary GrandPrÃ©",Harry Potter and the Philosopher's Stone,2077,2
3,2,439554934,"J.K. Rowling, Mary GrandPrÃ©",Harry Potter and the Philosopher's Stone,2487,3
4,2,439554934,"J.K. Rowling, Mary GrandPrÃ©",Harry Potter and the Philosopher's Stone,2900,3


In [3]:
# getting recommendation based on the No.of user Ratings and preparing data table for analysis
user_ratings = book_ratings.pivot_table(index=['user_id'],columns=['original_title'],values='rating')
user_ratings.head()

original_title,Unnamed: 1_level_0,Animal Farm & 1984,Carter Beats the Devil,The Fellowship of the Ring,"ãããããªãã¨ãç ãå¥³ [Mekurayanagi to, nemuru onna]",'Salem's Lot The Illustrated Edition,'Tis: A Memoir,1421: The Year China Discovered America,1776,31 Songs,...,á¼¸Î»Î¹Î¬Ï ; á½Î´ÏÏÏÎµÎ¹Î±,á¼¸ÏÏÎ¿ÏÎ¯Î±Î¹,á½Î´ÏÏÏÎµÎ¹Î±,á½ÏÎ­ÏÏÎµÎ¹Î±,â¦trotzdem Ja zum Leben sagen: Ein Psychologe erlebt das Konzentrationslager,ã¹ãã¼ããã¯ã®æäºº [SupÅ«toniku no Koibito],æµ·è¾ºã®ã«ãã« [Umibe no Kafuka],ç ã®å¥³ [Suna no onna],è±¡ã®æ¶æ» [ZÅ no shÅmetsu],é¼ã®é¬éè¡å¸« 1
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,,,,,,...,,,,5.0,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,


In [4]:
## removing the books which have less than 10 users who have rated it and filling remaining NaN with 0 in the data table.
user_ratings = user_ratings.dropna(thresh=10,axis=1).fillna(0)
user_ratings.head()

original_title,Unnamed: 1_level_0,Animal Farm & 1984,Carter Beats the Devil,The Fellowship of the Ring,"ãããããªãã¨ãç ãå¥³ [Mekurayanagi to, nemuru onna]",'Salem's Lot The Illustrated Edition,'Tis: A Memoir,1421: The Year China Discovered America,1776,31 Songs,...,á¼¸Î»Î¹Î¬Ï ; á½Î´ÏÏÏÎµÎ¹Î±,á¼¸ÏÏÎ¿ÏÎ¯Î±Î¹,á½Î´ÏÏÏÎµÎ¹Î±,á½ÏÎ­ÏÏÎµÎ¹Î±,â¦trotzdem Ja zum Leben sagen: Ein Psychologe erlebt das Konzentrationslager,ã¹ãã¼ããã¯ã®æäºº [SupÅ«toniku no Koibito],æµ·è¾ºã®ã«ãã« [Umibe no Kafuka],ç ã®å¥³ [Suna no onna],è±¡ã®æ¶æ» [ZÅ no shÅmetsu],é¼ã®é¬éè¡å¸« 1
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Standardizing
#### - process of reducing the mean of all ratings given by the user to zero and dividing it by the range.

In [5]:
## standardizing the vaalues
def standardize(row):
    new_row = (row - row.mean())/(row.max()-row.min())
    return new_row

ratings_standardize = user_ratings.apply(standardize)
ratings_standardize.head()

original_title,Unnamed: 1_level_0,Animal Farm & 1984,Carter Beats the Devil,The Fellowship of the Ring,"ãããããªãã¨ãç ãå¥³ [Mekurayanagi to, nemuru onna]",'Salem's Lot The Illustrated Edition,'Tis: A Memoir,1421: The Year China Discovered America,1776,31 Songs,...,á¼¸Î»Î¹Î¬Ï ; á½Î´ÏÏÏÎµÎ¹Î±,á¼¸ÏÏÎ¿ÏÎ¯Î±Î¹,á½Î´ÏÏÏÎµÎ¹Î±,á½ÏÎ­ÏÏÎµÎ¹Î±,â¦trotzdem Ja zum Leben sagen: Ein Psychologe erlebt das Konzentrationslager,ã¹ãã¼ããã¯ã®æäºº [SupÅ«toniku no Koibito],æµ·è¾ºã®ã«ãã« [Umibe no Kafuka],ç ã®å¥³ [Suna no onna],è±¡ã®æ¶æ» [ZÅ no shÅmetsu],é¼ã®é¬éè¡å¸« 1
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,-0.002416,-0.002634,-0.003026,-0.001933,-0.00249,-0.002038,-0.002746,-0.002564,-0.002753,-0.002893,...,-0.002339,-0.002501,-0.002613,-0.002872,-0.002851,-0.0029,-0.003026,-0.002879,-0.002304,-0.003068
3,-0.002416,-0.002634,-0.003026,-0.001933,-0.00249,-0.002038,-0.002746,-0.002564,-0.002753,-0.002893,...,-0.002339,-0.002501,-0.002613,-0.002872,-0.002851,-0.0029,-0.003026,-0.002879,-0.002304,-0.003068
4,-0.002416,-0.002634,-0.003026,-0.001933,-0.00249,-0.002038,-0.002746,-0.002564,-0.002753,-0.002893,...,-0.002339,-0.002501,-0.002613,-0.002872,-0.002851,-0.0029,-0.003026,-0.002879,-0.002304,-0.003068
7,-0.002416,-0.002634,-0.003026,-0.001933,-0.00249,-0.002038,-0.002746,-0.002564,-0.002753,-0.002893,...,-0.002339,-0.002501,-0.002613,0.997128,-0.002851,-0.0029,-0.003026,-0.002879,-0.002304,-0.003068
9,-0.002416,-0.002634,-0.003026,-0.001933,-0.00249,-0.002038,-0.002746,-0.002564,-0.002753,-0.002893,...,-0.002339,-0.002501,-0.002613,-0.002872,-0.002851,-0.0029,-0.003026,-0.002879,-0.002304,-0.003068


## Using Cosine Similarity

In [6]:
## transpose similarity between items since we are doing item-item collaborative filtering
item_ratings = cosine_similarity(ratings_standardize.T)
print(item_ratings)

[[ 1.         -0.0032264  -0.00328152 ... -0.00320012 -0.00306772
   0.00912419]
 [-0.0032264   1.         -0.00335483 ... -0.00327161 -0.00313625
   0.00362681]
 [-0.00328152 -0.00335483  1.         ... -0.0033275  -0.00318983
  -0.00341366]
 ...
 [-0.00320012 -0.00327161 -0.0033275  ...  1.         -0.00311071
  -0.00332899]
 [-0.00306772 -0.00313625 -0.00318983 ... -0.00311071  1.
  -0.00319125]
 [ 0.00912419  0.00362681 -0.00341366 ... -0.00332899 -0.00319125
   1.        ]]


In [41]:
## calculating cosine cofficient in DataFrame
book_data_frame = pd.DataFrame(item_ratings,index=user_ratings.columns, columns=user_ratings.columns)
book_data_frame

original_title,Unnamed: 1_level_0,Animal Farm & 1984,Carter Beats the Devil,The Fellowship of the Ring,"ãããããªãã¨ãç ãå¥³ [Mekurayanagi to, nemuru onna]",'Salem's Lot The Illustrated Edition,'Tis: A Memoir,1421: The Year China Discovered America,1776,31 Songs,...,á¼¸Î»Î¹Î¬Ï ; á½Î´ÏÏÏÎµÎ¹Î±,á¼¸ÏÏÎ¿ÏÎ¯Î±Î¹,á½Î´ÏÏÏÎµÎ¹Î±,á½ÏÎ­ÏÏÎµÎ¹Î±,â¦trotzdem Ja zum Leben sagen: Ein Psychologe erlebt das Konzentrationslager,ã¹ãã¼ããã¯ã®æäºº [SupÅ«toniku no Koibito],æµ·è¾ºã®ã«ãã« [Umibe no Kafuka],ç ã®å¥³ [Suna no onna],è±¡ã®æ¶æ» [ZÅ no shÅmetsu],é¼ã®é¬éè¡å¸« 1
original_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
,1.000000,-0.003226,-0.003282,-0.002929,-0.003190,-0.002741,-0.003237,-0.003216,-0.003253,-0.003235,...,-0.003205,0.010106,-0.003191,-0.003251,-0.003246,-0.003237,-0.003265,-0.003200,-0.003068,0.009124
Animal Farm & 1984,-0.003226,1.000000,-0.003355,-0.002995,-0.003261,-0.002802,0.006308,-0.003288,-0.003325,-0.003308,...,0.005664,-0.003256,-0.003262,-0.003323,-0.003319,-0.003309,0.003703,-0.003272,-0.003136,0.003627
Carter Beats the Devil,-0.003282,-0.003355,1.000000,0.003519,0.006550,-0.002850,-0.003366,-0.003344,-0.003382,-0.003364,...,-0.003333,-0.003312,0.016673,-0.003380,0.005396,-0.003366,0.019977,-0.003328,-0.003190,-0.003414
The Fellowship of the Ring,-0.002929,-0.002995,0.003519,1.000000,-0.002961,-0.002544,-0.003005,0.018109,0.035126,0.013546,...,0.039413,0.011607,0.021682,0.006880,-0.003013,-0.003004,-0.003030,-0.002970,-0.002847,0.011347
"ãããããªãã¨ãç ãå¥³ [Mekurayanagi to, nemuru onna]",-0.003190,-0.003261,0.006550,-0.002961,1.000000,-0.002770,-0.003272,-0.003251,-0.003287,-0.003270,...,0.013121,-0.003219,-0.003225,-0.003286,-0.003281,-0.003272,-0.003300,-0.003234,-0.003101,0.006418
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ã¹ãã¼ããã¯ã®æäºº [SupÅ«toniku no Koibito],-0.003237,-0.003309,-0.003366,-0.003004,-0.003272,-0.002811,0.003690,-0.003299,-0.003336,0.137994,...,0.004859,-0.003267,-0.003273,-0.003334,-0.003330,1.000000,-0.003348,-0.003282,-0.003146,-0.003367
æµ·è¾ºã®ã«ãã« [Umibe no Kafuka],-0.003265,0.003703,0.019977,-0.003030,-0.003300,-0.002835,-0.003349,-0.003327,-0.003364,-0.003347,...,-0.003316,-0.003295,-0.003301,-0.003363,0.007549,-0.003348,1.000000,-0.003310,-0.003173,-0.003396
ç ã®å¥³ [Suna no onna],-0.003200,-0.003272,-0.003328,-0.002970,-0.003234,0.010491,-0.003283,-0.003261,-0.003298,-0.003281,...,-0.003250,0.004313,-0.003236,0.007875,-0.003292,-0.003282,-0.003310,1.000000,-0.003111,-0.003329
è±¡ã®æ¶æ» [ZÅ no shÅmetsu],-0.003068,-0.003136,-0.003190,-0.002847,-0.003101,-0.002664,-0.003147,-0.003126,-0.003162,-0.003145,...,-0.003116,-0.003096,-0.003102,-0.003160,-0.003156,-0.003146,-0.003173,-0.003111,1.000000,-0.003191


## Recommending similar books

In [42]:
## getting most similar book recommendation
## funtion to get most similar boooks
def get_similar_books(book_name,user_rating):
    similar_book = book_data_frame[book_name]*(user_rating-2.5)
    # sorting them
    similar_book = similar_book.sort_values(ascending=False)
    
    ## returning similar books
    return similar_book


In [54]:
## rating the books
book_reader = [("A Man Without a Country",3),
               ("Angels & Demons ",3),
               ("A Great and Terrible Beauty",2),
               ("A Lesson Before Dying",4)]

similar_books = pd.DataFrame()

for book,rating in book_reader:
    similar_books = similar_books.append(get_similar_books(book,rating),ignore_index=True)
    
similar_books.head()
recommend_books = pd.DataFrame(similar_books.sum().sort_values(ascending=False))
recommend_books.head(15)

Unnamed: 0,0
A Lesson Before Dying,1.498375
A Man Without a Country,0.502719
Angels & Demons,0.502701
Lunar Park,0.314588
á¼¸ÏÏÎ¿ÏÎ¯Î±Î¹,0.306232
æµ·è¾ºã®ã«ãã« [Umibe no Kafuka],0.262166
From the Mixed-Up Files of Mrs. Basil E. Frankweiler,0.239737
In Our Time,0.212922
Open House,0.199227
His Excellency: George Washington,0.181749
