In [24]:
import numpy as np
import pandas as pd
import warnings
pd.options.mode.chained_assignment = None

In [25]:
books = pd.read_csv('Best_Books_Ever.csv')

In [26]:
books = books[['title','author','rating','genres','coverImg', 'isbn', 'characters']]

In [27]:
books.isnull().sum()

Unnamed: 0,0
title,0
author,0
rating,0
genres,0
coverImg,605
isbn,0
characters,0


In [28]:
def convert_author(obj):
    L=[]
    counter = 0
    for i in obj:
        if i.find('(') != -1:
            i = i.split('(')[0]
        L.append(i)
    return L

In [29]:
books['author'] = books['author'].apply(lambda x: x.split(', ') if isinstance(x, str) else x)
books['author'] = books['author'].apply(convert_author)
books['author'] = books['author'].apply(lambda x : [i.replace(" ","") for i in x])

In [30]:
def convert_characters(obj):
    L=[]
    counter = 0
    for i in obj:
        if i.find('(') != -1:
            i = i.split('(')[0]
        if counter != 10:
            L.append(i)
            counter+=1
        else:
            break
    return L

In [31]:
import ast
books['genres'] = books['genres'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
books['genres'] = books['genres'].apply(lambda x : [i.replace(" ","") for i in x])
books['characters'] = books['characters'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
books['characters'] = books['characters'].apply(lambda x : [i.replace(" ","") for i in x])

In [32]:
books['characters'] = books['characters'].apply(convert_characters)

In [33]:
books['tags'] = books['author'] + books['genres'] + books['characters']

In [34]:
new_books = books[['isbn', 'title', 'tags', 'rating', 'coverImg']]

In [35]:
new_books['tags'] = new_books['tags'].apply(lambda x: " ".join(x))

In [36]:
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [37]:
def stemming(text):
  y = []
  for i in text.split():
    y.append(ps.stem(i))
  return " ".join(y)

In [38]:
new_books['tags'] = new_books['tags'].apply(stemming)

In [39]:
new_books.to_csv("new_books.csv", index=False)

In [40]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000, stop_words='english')

In [41]:
vectors = cv.fit_transform(new_books['tags']).toarray()

In [42]:
vectors.shape

(52478, 5000)

In [43]:
import sys
np.set_printoptions(threshold=sys.maxsize)
cv.get_feature_names_out()

array(['12thcenturi', '13thcenturi', '14thcenturi', '15thcenturi',
       '16thcenturi', '17thcenturi', '18thcenturi', '19thcenturi',
       '1stdukeofclar', '1stdukeofsuffolk', '1stdukeofwellington',
       '1stgrade', '20thcenturi', '21stcenturi', '2ndgrade', '40k',
       'aaron', 'aaronb', 'aaronwarneranderson', 'abbi', 'abbiglin',
       'abdullahhussein', 'abdulmalikmujahid', 'abigailarcan',
       'abigailroux', 'abraham', 'abrahamlincoln', 'abuhamidal', 'abus',
       'academ', 'academia', 'accinni', 'ace', 'achil', 'action', 'activ',
       'adam', 'adamhauptman', 'adamjeffri', 'adamvas', 'addisonmoor',
       'adi', 'adolesc', 'adolfhitl', 'adopt', 'adrianalock',
       'adrianivashkov', 'adrianmol', 'adriennethompson', 'adult',
       'adultfict', 'adventur', 'aenea', 'africa', 'africanamerican',
       'africanamericanliteratur', 'africanamericanrom',
       'africanliteratur', 'agamemnon', 'agatha', 'agathachristi',
       'agricultur', 'ai', 'aidenst', 'aigl', 'aiken', 'a

In [44]:
pip install annoy



In [45]:
from annoy import AnnoyIndex
annoy_index = AnnoyIndex(vectors.shape[1], 'angular')

In [46]:
for i in range(vectors.shape[0]):
    annoy_index.add_item(i, vectors[i])

# Build the index with 10 trees (higher = better accuracy, more memory)
annoy_index.build(10)

True

In [47]:
annoy_index.save('Book_Recommendation.ann')

True

In [48]:
def recommend(book):
    book_index = new_books[new_books['title'] == book].index[0]
    similar_books = annoy_index.get_nns_by_item(book_index, 15)

    similar_books = [i for i in similar_books if "boxset" not in new_books.iloc[i]['title'].lower()]
    similar_books = [i for i in similar_books if "box set" not in new_books.iloc[i]['title'].lower()]
    similar_books = [i for i in similar_books if "sampler" not in new_books.iloc[i]['title'].lower()]

    similar_books = sorted(similar_books, key=lambda x: new_books.iloc[x]['rating'], reverse=True)
    print(f"Recommendations for '{new_books.loc[0, 'title']}':")
    for idx in similar_books:
      print(f"- {new_books.loc[idx, 'title']}")
    return similar_books

In [49]:
similar_books = recommend('The Hunger Games')

Recommendations for 'The Hunger Games':
- The Hunger Games
- Catching Fire
- Legend
- Mockingjay
- Independent Study
- Blood Red Road
- Once
- Graduation Day
- Sever
- Article 5
- The Last Princess
- Revealing Eden
