# Book Recommender System

In [68]:
import numpy as np
import pandas as pd

In [69]:
books_df = pd.read_csv('data/Books.csv')
ratings_df = pd.read_csv('data/Ratings.csv')
users_df = pd.read_csv('data/Users.csv')

  books_df = pd.read_csv('data/Books.csv')


In [70]:
# books_df.head()

In [71]:
# users_df.head()

In [72]:
# ratings_df.head()

## Popularity Based Recommender
### Top 50 Books

In [73]:
ratings_n_books_df = ratings_df.merge(books_df, on='ISBN')

In [74]:
num_ratings_df = ratings_n_books_df.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_ratings_df.rename(columns={'Book-Rating':'num_ratings'}, inplace=True)
# num_ratings_df

In [75]:
avg_ratings_df = ratings_n_books_df.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_ratings_df.rename(columns={'Book-Rating':'avg_ratings'}, inplace=True)
# avg_ratings_df

In [76]:
old_popularity_df = num_ratings_df.merge(avg_ratings_df, on='Book-Title')
popularity_df = old_popularity_df[old_popularity_df['num_ratings']>=250].sort_values('avg_ratings', ascending=False).head(50)
popularity_df = popularity_df.merge(books_df, on='Book-Title').drop_duplicates('Book-Title')[['Book-Title', 'Book-Author', 'Year-Of-Publication', 'Image-URL-L', 'num_ratings', 'avg_ratings']]
popularity_df

Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Image-URL-L,num_ratings,avg_ratings
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,http://images.amazon.com/images/P/0439136350.0...,428,5.852804
3,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,http://images.amazon.com/images/P/0439139597.0...,387,5.824289
5,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,1998,http://images.amazon.com/images/P/0590353403.0...,278,5.73741
9,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,http://images.amazon.com/images/P/043935806X.0...,347,5.501441
13,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,http://images.amazon.com/images/P/0439064872.0...,556,5.183453
16,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,1986,http://images.amazon.com/images/P/0345339681.0...,281,5.007117
17,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,1986,http://images.amazon.com/images/P/0345339703.0...,368,4.94837
26,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,1999,http://images.amazon.com/images/P/059035342X.0...,575,4.895652
28,"The Two Towers (The Lord of the Rings, Part 2)",J.R.R. TOLKIEN,1986,http://images.amazon.com/images/P/0345339711.0...,260,4.880769
39,To Kill a Mockingbird,Harper Lee,1988,http://images.amazon.com/images/P/0446310786.0...,510,4.7


## Collabrative Filtering Based Recommender(KNN)
### Matching Books

In [77]:
x = ratings_n_books_df.groupby('User-ID').count()['Book-Rating'] > 200
imp_users = x[x].index
filtered_ratings = ratings_n_books_df[ratings_n_books_df['User-ID'].isin(imp_users)]
y = filtered_ratings.groupby('Book-Title').count()['Book-Rating'] >= 50
imp_books = y[y].index
filtered_ratings = filtered_ratings[filtered_ratings['Book-Title'].isin(imp_books)]
# filtered_ratings

In [78]:
books_pt = filtered_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')
books_pt.fillna(0, inplace = True)
# books_pt

In [79]:
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [80]:
ratings_matrix = csr_matrix(books_pt.values)
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute').fit(ratings_matrix)

In [99]:
books_n_popularity_df = old_popularity_df.merge(books_df, on='Book-Title').drop_duplicates('Book-Title')[['Book-Title', 'Book-Author', 'Year-Of-Publication', 'Image-URL-L', 'num_ratings', 'avg_ratings']]

In [100]:
def recommend(book_name):
    query_index = np.where(books_pt.index == book_name)[0][0]
    distances, indices = model_knn.kneighbors(books_pt.iloc[query_index, :].values.reshape(1, -1), n_neighbors=11)
    data = []
    for i in range(1, len(distances.flatten())):
        item = []
        temp_df = books_n_popularity_df[books_n_popularity_df['Book-Title'] == books_pt.index[indices.flatten()[i]]]
        temp_df.drop_duplicates('Book-Title')
        item.extend(list(temp_df['Book-Title'].values))
        item.extend(list(temp_df['Book-Author'].values))
        item.extend(list(temp_df['Image-URL-L'].values))
        item.extend(list(temp_df['Year-Of-Publication'].values))
        item.extend(list(temp_df['avg_ratings'].values))
        item.extend(list(temp_df['num_ratings'].values))
        data.append(item)
    return data

In [101]:
recommend('1984')

[['Animal Farm',
  'George Orwell',
  'http://images.amazon.com/images/P/0451526341.01.LZZZZZZZ.jpg',
  2004,
  4.274678111587983,
  233],
 ["The Handmaid's Tale",
  'Margaret Atwood',
  'http://images.amazon.com/images/P/0449212602.01.LZZZZZZZ.jpg',
  1989,
  3.3987138263665595,
  311],
 ['Brave New World',
  'Aldous Huxley',
  'http://images.amazon.com/images/P/0060809833.01.LZZZZZZZ.jpg',
  1989,
  4.331858407079646,
  226],
 ['The Vampire Lestat (Vampire Chronicles, Book II)',
  'ANNE RICE',
  'http://images.amazon.com/images/P/0345313860.01.LZZZZZZZ.jpg',
  1986,
  3.777408637873754,
  301],
 ['The Hours : A Novel',
  'Michael Cunningham',
  'http://images.amazon.com/images/P/0312243022.01.LZZZZZZZ.jpg',
  2000,
  3.4444444444444446,
  216],
 ['Fahrenheit 451',
  'Ray Bradbury',
  'http://images.amazon.com/images/P/3257208626.01.LZZZZZZZ.jpg',
  1994,
  4.264058679706602,
  409],
 ['The Catcher in the Rye',
  'J.D. Salinger',
  'http://images.amazon.com/images/P/0316769487.01.LZZZ

In [102]:
import pickle
# pickle.dump(popularity_df, open('popularity.pkl', 'wb'))
# pickle.dump(books_pt, open('books_pt.pkl', 'wb'))
# pickle.dump(books_n_popularity_df, open('books_n_popularity_df.pkl', 'wb'))
# pickle.dump(model_knn, open('model_knn.pkl', 'wb'))