In [12]:
"""
Uses cosine similarity.
Filters only books where at least 100 reviews are present.
Only where reviewers have given at least 200 reviews.
"""


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler



In [13]:
books_df = pd.read_csv("../data/Books.csv")
users_df = pd.read_csv("../data/Users.csv")
ratings_df = pd.read_csv("../data/ratings.csv")

  books_df = pd.read_csv("../data/Books.csv")


In [14]:
ratings_df.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [15]:
# Clean up
# 1. Find the user ID of those who have rater either too few books (<5) and too many books (>200)
# 2. Remove those users from the ratings_df

valid_transaction_df = ratings_df.groupby('User-ID').filter(lambda x: len(x) > 5 and len(x) < 200)
valid_transaction_df.groupby('User-ID').size().sort_values(ascending=False)

User-ID
240403    199
203017    199
193458    199
2033      198
267061    198
         ... 
95420       6
95156       6
231313      6
95146       6
233397      6
Length: 18812, dtype: int64

In [16]:
ratings_with_book_titles = ratings_df.merge(books_df,on='ISBN')
ratings_with_book_titles.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
3,8680,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
4,10314,034545104X,9,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...


In [17]:
ratings_with_book_titles.drop(columns=["ISBN","Image-URL-S","Image-URL-M"],axis=1,inplace=True)

In [18]:
complete_df = ratings_with_book_titles.merge(users_df.drop("Age", axis=1), on="User-ID")
complete_df.head()

Unnamed: 0,User-ID,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-L,Location
0,276725,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,"tyler, texas, usa"
1,2313,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,"cincinnati, ohio, usa"
2,2313,9,Ender's Game (Ender Wiggins Saga (Paperback)),Orson Scott Card,1986,Tor Books,http://images.amazon.com/images/P/0812533550.0...,"cincinnati, ohio, usa"
3,2313,8,In Cold Blood (Vintage International),TRUMAN CAPOTE,1994,Vintage,http://images.amazon.com/images/P/0679745580.0...,"cincinnati, ohio, usa"
4,2313,9,Divine Secrets of the Ya-Ya Sisterhood : A Novel,Rebecca Wells,1996,HarperCollins,http://images.amazon.com/images/P/0060173289.0...,"cincinnati, ohio, usa"


In [19]:
complete_df['Location'] = complete_df['Location'].str.split(',').str[-1].str.strip()
complete_df.head()

Unnamed: 0,User-ID,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-L,Location
0,276725,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,usa
1,2313,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,usa
2,2313,9,Ender's Game (Ender Wiggins Saga (Paperback)),Orson Scott Card,1986,Tor Books,http://images.amazon.com/images/P/0812533550.0...,usa
3,2313,8,In Cold Blood (Vintage International),TRUMAN CAPOTE,1994,Vintage,http://images.amazon.com/images/P/0679745580.0...,usa
4,2313,9,Divine Secrets of the Ya-Ya Sisterhood : A Novel,Rebecca Wells,1996,HarperCollins,http://images.amazon.com/images/P/0060173289.0...,usa


In [20]:
min_user_reviewers = 200
user_ratings = complete_df.groupby('User-ID').count()
filtered_users = user_ratings[user_ratings > min_user_reviewers].index
complete_df = complete_df[complete_df['User-ID'].isin(filtered_users)]


In [21]:
min_ratings_count_threshold=100
rating_counts= complete_df.groupby('Book-Title').count()['Book-Rating']
popular_books = rating_counts[rating_counts >= min_ratings_count_threshold].index

In [22]:
final_ratings =  complete_df[complete_df['Book-Title'].isin(popular_books)]
print(f"Number of ratings: {len(final_ratings)}")
final_ratings.head()

Number of ratings: 183799


Unnamed: 0,User-ID,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-L,Location
2,2313,9,Ender's Game (Ender Wiggins Saga (Paperback)),Orson Scott Card,1986,Tor Books,http://images.amazon.com/images/P/0812533550.0...,usa
4,2313,9,Divine Secrets of the Ya-Ya Sisterhood : A Novel,Rebecca Wells,1996,HarperCollins,http://images.amazon.com/images/P/0060173289.0...,usa
6,2313,5,The Bonesetter's Daughter,Amy Tan,2001,Putnam Publishing Group,http://images.amazon.com/images/P/0399146431.0...,usa
7,2313,9,The Princess Bride: S Morgenstern's Classic Ta...,WILLIAM GOLDMAN,1987,Del Rey,http://images.amazon.com/images/P/0345348036.0...,usa
9,2313,0,The Sparrow,MARY DORIA RUSSELL,1997,Fawcett Books,http://images.amazon.com/images/P/0449912558.0...,usa


In [23]:
pt = final_ratings.pivot_table(index='Book-Title',columns='User-ID'
                          ,values='Book-Rating')
pt.head()

User-ID,8,9,14,16,17,26,32,39,42,44,...,278819,278820,278824,278828,278832,278836,278843,278844,278846,278854
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,,,,,,,,,,,...,,,,,,,,,,
1st to Die: A Novel,,,,,,,,,,,...,,,,,,,,,,
24 Hours,,,,,,,,,,,...,,,,,,,,,,
2nd Chance,,,,,,,,,,,...,,,,,,,,,,
4 Blondes,,,,,,,,,,,...,,,,,,,,,,


In [24]:
pt.fillna(0,inplace=True)
pt

User-ID,8,9,14,16,17,26,32,39,42,44,...,278819,278820,278824,278828,278832,278836,278843,278844,278846,278854
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24 Hours,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
from sklearn.metrics.pairwise import cosine_similarity 

similarity_score = cosine_similarity(pt)


def recommend(book_name):
    index = np.where(pt.index==book_name)[0][0]
    similar_books = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1], reverse=True)[1:6]
    
    data = []
    
    for i in similar_books:
        item = []
        temp_df = books_df[books_df['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        
        data.append(item)
    return data

def print_available_books():
    return list(final_ratings['Book-Title'].unique())

def search_name_like(name):
    return list(final_ratings[final_ratings['Book-Title'].str.contains(name)]['Book-Title'].unique())

def search_and_recommend(name):
    search_results = search_name_like(name)
    if len(search_results) == 0:
        return "No books found"
    return search_results, recommend(search_results[0])

In [26]:
search_name_like("Heaven")

['The Five People You Meet in Heaven',
 'Pigs in Heaven',
 'Heaven and Earth (Three Sisters Island Trilogy)',
 'One Door Away from Heaven',
 'Cry to Heaven']

In [27]:
recommend("One Door Away from Heaven")

[['From a Buick 8 : A Novel',
  'Stephen King',
  'http://images.amazon.com/images/P/0743211375.01.MZZZZZZZ.jpg'],
 ['By the Light of the Moon',
  'DEAN KOONTZ',
  'http://images.amazon.com/images/P/0553582763.01.MZZZZZZZ.jpg'],
 ['Mr. Murder',
  'Dean R. Koontz',
  'http://images.amazon.com/images/P/0425144429.01.MZZZZZZZ.jpg'],
 ['The Key to Midnight',
  'Dean R. Koontz',
  'http://images.amazon.com/images/P/0425147517.01.MZZZZZZZ.jpg'],
 ['Dark Rivers of the Heart',
  'Dean R. Koontz',
  'http://images.amazon.com/images/P/034539657X.01.MZZZZZZZ.jpg']]

In [28]:
recommend('Harry Potter and the Chamber of Secrets (Book 2)')

[['Harry Potter and the Prisoner of Azkaban (Book 3)',
  'J. K. Rowling',
  'http://images.amazon.com/images/P/0439136350.01.MZZZZZZZ.jpg'],
 ['Harry Potter and the Goblet of Fire (Book 4)',
  'J. K. Rowling',
  'http://images.amazon.com/images/P/0439139597.01.MZZZZZZZ.jpg'],
 ["Harry Potter and the Sorcerer's Stone (Book 1)",
  'J. K. Rowling',
  'http://images.amazon.com/images/P/0590353403.01.MZZZZZZZ.jpg'],
 ["Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))",
  'J. K. Rowling',
  'http://images.amazon.com/images/P/059035342X.01.MZZZZZZZ.jpg'],
 ['Harry Potter and the Order of the Phoenix (Book 5)',
  'J. K. Rowling',
  'http://images.amazon.com/images/P/043935806X.01.MZZZZZZZ.jpg']]

In [30]:
search_and_recommend('Dune')

(['Dune (Remembering Tomorrow)'],
 [['The Mists of Avalon',
   'MARION ZIMMER BRADLEY',
   'http://images.amazon.com/images/P/0345350499.01.MZZZZZZZ.jpg'],
  ['Silence of the Lambs',
   'Thomas Harris',
   'http://images.amazon.com/images/P/0312924585.01.MZZZZZZZ.jpg'],
  ['Skeleton Crew',
   'Stephen King',
   'http://images.amazon.com/images/P/0451168615.01.MZZZZZZZ.jpg'],
  ["Ender's Game (Ender Wiggins Saga (Paperback))",
   'Orson Scott Card',
   'http://images.amazon.com/images/P/0312853238.01.MZZZZZZZ.jpg'],
  ["Dirk Gently's Holistic Detective Agency",
   'Douglas Adams',
   'http://images.amazon.com/images/P/0671660632.01.MZZZZZZZ.jpg']])

In [34]:
search_and_recommend('Mystery')

(['E Is for Evidence: A Kinsey Millhone Mystery (Kinsey Millhone Mysteries (Paperback))'],
 [['D Is for Deadbeat (Kinsey Millhone Mysteries (Paperback))',
   'Sue Grafton',
   'http://images.amazon.com/images/P/0553271636.01.MZZZZZZZ.jpg'],
  ['F Is for Fugitive (Kinsey Millhone Mysteries (Paperback))',
   'Sue Grafton',
   'http://images.amazon.com/images/P/0553284789.01.MZZZZZZZ.jpg'],
  ['I Is for Innocent',
   'Sue Grafton',
   'http://images.amazon.com/images/P/0449221512.01.MZZZZZZZ.jpg'],
  ['H Is for Homicide (Kinsey Millhone Mysteries (Paperback))',
   'Sue Grafton',
   'http://images.amazon.com/images/P/0449219461.01.MZZZZZZZ.jpg'],
  ['G Is for Gumshoe (Kinsey Millhone Mysteries (Paperback))',
   'Sue Grafton',
   'http://images.amazon.com/images/P/0449219364.01.MZZZZZZZ.jpg']])