Note:

- Collaborative filters and content filters are the 2 most common types of recommenders out there.
- Collaborative filtering works at a user-level taking an individual's statistics like ratings, which items they viewed, etc., and draws similarities between other users based on these values potentially suggesting items that one user has interacted with that another similar user has not.
- Content filters focus more on the similarities between the items, such as weighted ratings, similarity of authors, frequency of topics appearing in the description, and so on rather than the users. This method requires a direct 'similarity score' between items in order to compute how related they are.
- Cntent filtering is beat suited for this analysis considering the nature of my dataset



In [None]:
!pip install rake_nltk

Collecting rake_nltk
  Downloading https://files.pythonhosted.org/packages/8e/c4/b4ff57e541ac5624ad4b20b89c2bafd4e98f29fd83139f3a81858bdb3815/rake_nltk-1.0.4.tar.gz
Building wheels for collected packages: rake-nltk
  Building wheel for rake-nltk (setup.py) ... [?25l[?25hdone
  Created wheel for rake-nltk: filename=rake_nltk-1.0.4-py2.py3-none-any.whl size=7829 sha256=d9a74bf1ef7f4bef65be6db2b14c5a870c558b87cf7de3141bb624a1e163691e
  Stored in directory: /root/.cache/pip/wheels/ef/92/fc/271b3709e71a96ffe934b27818946b795ac6b9b8ff8682483f
Successfully built rake-nltk
Installing collected packages: rake-nltk
Successfully installed rake-nltk-1.0.4


In [None]:
import numpy as np
import pandas as pd
import re
import pickle
import random
import string
from rake_nltk import Rake
from nltk.tokenize import wordpunct_tokenize, RegexpTokenizer
from nltk.corpus import stopwords

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances
from sklearn.metrics.pairwise import linear_kernel

import tensorflow.keras as tf
import keras

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
books = pd.read_csv('/content/gdrive/MyDrive/books_recommender_chatbot/goodread_books.csv', encoding='Latin-1')
books.head()

Unnamed: 0.1,Unnamed: 0,title,original_title,series,language,authors,avg_rating,num_ratings,num_reviews,genres,description,url,weighted_rating
0,0,A Prayer for Owen Meany,A Prayer for Owen Meany,,English,John Irving,4.23,294351,14399,"Fiction,Classics,Contemporary,Literature,Novel...","Eleven-year-old Owen Meany, playing in a Littl...",https://www.goodreads.com/book/show/4473.A_Pra...,4.228549
1,1,The World According to Garp,The World According to Garp,,English,John Irving,4.09,210460,5716,"Fiction,Classics,Contemporary,Literature,Novel...","This is the life and times of T. S. Garp, the ...",https://www.goodreads.com/book/show/7069.The_W...,4.089581
2,2,Leaves of Grass,Leaves of Grass,Iowa Whitman Series,English,Walt Whitman,4.11,91893,2640,"Poetry,Classics,Fiction,Literature,Literature,...",A collection of quintessentially American poem...,https://www.goodreads.com/book/show/27494.Leav...,4.108536
3,3,The Firm,The Firm,,English,"Robin Waterfield (Adapter),John Grisham",4.03,588541,3604,"Fiction,Thriller,Mystery,Suspense,Mystery,Crim...","Adaptation for younger readers. Mitch McDeere,...",https://www.goodreads.com/book/show/5358.The_Firm,4.030096
4,4,The Last Battle,The Last Battle,The Chronicles of Narnia (Publication Order),English,C.S. Lewis,4.01,240167,7167,"Fantasy,Fiction,Classics,Young Adult,Childrens...",This edition of Lewis's classic fantasy fictio...,https://www.goodreads.com/book/show/84369.The_...,4.010436


In [None]:
books.drop(columns='Unnamed: 0', inplace=True)

In [None]:
# Takes a string and returns an array of its processed words.
def clean_string(s):
    # Remove stopwords and punctuation.
    stop = stopwords.words('english') + list(string.punctuation)
    return [n for n in wordpunct_tokenize(s.lower()) if n not in stop]

In [None]:
def create_soup(x):
    title_importance = 1
    language_importance = 1
    series_importance = 1
    authors_importance = 1
    genres_importance = 1

    soup = ''
    
    # Keywords from description.
    desc = x['description']
    if desc is not np.nan:
        rake = Rake()
        rake.extract_keywords_from_text(desc)
        desc_soup = ' '.join(list(rake.get_word_degrees().keys()))
        soup = ' '.join(filter(None, [soup, desc_soup]))
    
    # Title.
    title_soup = ' '.join(clean_string(x['title']) * title_importance)
    soup = ' '.join(filter(None, [soup, title_soup]))
    
    # Language.
    language = x['language']
    if language is not np.nan:
        language_soup = ' '.join(clean_string(language) * language_importance)
        soup = ' '.join(filter(None, [soup, language_soup]))
    
    # Series.
    series = x['series']
    if series is not np.nan:
        series_soup = ' '.join(clean_string(series) * series_importance)
        soup = ' '.join(filter(None, [soup, series_soup]))

    # Authors.
    authors = x['authors']
    if authors is not np.nan:
        # I'm trying to not remove punctuation here but to just set all as spaces so as to retain the (Role).
        author_soup = ' '.join([a.lower().replace(' ', '') for a in authors.split(',')] * authors_importance)
        soup = ' '.join(filter(None, [soup, author_soup]))
    
    # Genres.
    genres = x['genres']
    if genres is not np.nan:
        # Almost the same treatment as authors (strip spaces to make matching a bit more likely).
        genre_soup = ' '.join([g.lower().replace(' ', '') for g in genres.split(',')] * genres_importance)
        soup = ' '.join(filter(None, [soup, genre_soup]))
    
    return soup

In [None]:
books['soup'] = books.apply(create_soup, axis=1)

In [None]:
books.head(3)

Unnamed: 0,title,original_title,series,language,authors,avg_rating,num_ratings,num_reviews,genres,description,url,weighted_rating,soup
0,A Prayer for Owen Meany,A Prayer for Owen Meany,,English,John Irving,4.23,294351,14399,"Fiction,Classics,Contemporary,Literature,Novel...","Eleven-year-old Owen Meany, playing in a Littl...",https://www.goodreads.com/book/show/4473.A_Pra...,4.228549,little league baseball game mother moments hit...
1,The World According to Garp,The World According to Garp,,English,John Irving,4.09,210460,5716,"Fiction,Classics,Contemporary,Literature,Novel...","This is the life and times of T. S. Garp, the ...",https://www.goodreads.com/book/show/7069.The_W...,4.089581,famous mother dark even hilarious evidence nov...
2,Leaves of Grass,Leaves of Grass,Iowa Whitman Series,English,Walt Whitman,4.11,91893,2640,"Poetry,Classics,Fiction,Literature,Literature,...",A collection of quintessentially American poem...,https://www.goodreads.com/book/show/27494.Leav...,4.108536,seminal work collection quintessentially ameri...


In [None]:
count = CountVectorizer()
X = count.fit_transform(books['soup'])

In [None]:
cosine_sim = cosine_similarity(X, X)

In [None]:
# reversing title and index 
title_to_index = pd.Series(books.index, index=books['title'])
title_to_index.head()

title
A Prayer for Owen Meany        0
The World According to Garp    1
Leaves of Grass                2
The Firm                       3
The Last Battle                4
dtype: int64

In [None]:
ignore = ['a', 'an', 'the', 'am' ,'is', 'are', 'and', 'of', 'in' , 'on', 'with', 'from', 'to']

def title_case(text):
    title = ''
    words = text.split()
    for word in words:
        if word != words[0]:
            if word not in ignore:
                title += (' ' + word.title())
            else:
                title += (' ' + word)
        else: 
            title += (word.capitalize())

    return title

In [None]:
title_case('to kill a mockingbird')

'To Kill a Mockingbird'

In [None]:
def get_recommendations(title):
    title = title_case(title)
    idx = title_to_index[title]
    print(f'{idx} {title}:\n{books.loc[idx].soup}')

    scores = pd.Series(cosine_sim[idx]).sort_values(ascending=False)
    book_indices = list(scores.iloc[1:11].index)
    
    print(scores[1:11])
    return books.iloc[book_indices]

In [None]:
def title_search(key_word_phrase):
    key_word_phrase = title_case(key_word_phrase)
    return books.loc[books.title.str.contains(key_word_phrase)]

In [None]:
get_recommendations('the firm')

3 The Firm:
mitch mcdeere harvard law graduate chicago mob figure operations memphis tax firm mysterious deaths adaptation obsessive office security younger readers becomes suspicious firm english robinwaterfield(adapter) johngrisham fiction thriller mystery suspense mystery crime thriller mysterythriller thriller legalthriller novels drama law
1983    0.407946
6984    0.392913
3838    0.392232
5294    0.384329
3461    0.382433
8709    0.380970
6436    0.380169
4541    0.378224
8169    0.376404
7304    0.376309
dtype: float64


Unnamed: 0,title,original_title,series,language,authors,avg_rating,num_ratings,num_reviews,genres,description,url,weighted_rating,soup
1983,The Chamber,The Chamber,,English,John Grisham,3.81,132855,1798,"Fiction,Thriller,Mystery,Thriller,Legal Thrill...",In the corridors of Chicago's top law firm: Tw...,https://www.goodreads.com/book/show/5355.The_C...,3.814387,run family lies adam client old hall stands yo...
6984,Rising Sun,Rising Sun,,English,Michael Crichton,3.66,50835,1010,"Fiction,Thriller,Mystery,Mystery,Crime,Suspens...",In a novel set within the arena of volatile Ja...,https://www.goodreads.com/book/show/7668.Risin...,3.678008,international electronics industry american re...
3838,Windmills of the Gods,Windmills of the Gods,,English,Sidney Sheldon,3.86,29610,656,"Fiction,Thriller,Mystery,Suspense,Thriller,Mys...",This classic best-selling thriller races from ...,https://www.goodreads.com/book/show/119389.Win...,3.874716,classic best destruction romance shady menace ...
5294,The Matarese Circle,The Matarese Circle,Matarese Dynasty,English,Robert Ludlum,4.05,42191,373,"Fiction,Thriller,Mystery,Spy Thriller,Espionag...",The Matarese killers will take over the world ...,https://www.goodreads.com/book/show/31231.The_...,4.050185,gq world within two years ... matarese killers...
3461,Rage of Angels,Rage of Angels,,English,Sidney Sheldon,3.95,35782,1185,"Fiction,Thriller,Mystery,Romance,Suspense,Thri...",A worldwide bestseller first published in 1980...,https://www.goodreads.com/book/show/43328.Rage...,3.956597,story two men one loved successful lawyer nove...
8709,The Third Option,The Third Option,Mitch Rapp,English,Vince Flynn,4.24,42384,1177,"Fiction,Thriller,Action,Spy Thriller,Espionage...","Mitch Rapp, CIA's top counterterrorism operati...",https://www.goodreads.com/book/show/184661.The...,4.229846,cia german industrialist notorious sponsors se...
6436,The Street Lawyer,The Street Lawyer,,English,John Grisham,3.85,106880,2607,"Fiction,Thriller,Mystery,Thriller,Legal Thrill...",Michael was in a hurry. He was scrambling up t...,https://www.goodreads.com/book/show/5351.The_S...,3.854537,streets mentally ill veteran giant time drake ...
4541,The King of Torts,The King of Torts,,English,John Grisham,3.7,80010,2271,"Fiction,Mystery,Thriller,Thriller,Legal Thrill...",The office of the public defender is not known...,https://www.goodreads.com/book/show/5356.The_K...,3.710453,training ground every week better job young ma...
8169,The Day After Tomorrow,The Day After Tomorrow,,English,Allan Folsom,4.02,6999,520,"Thriller,Fiction,Mystery,Suspense,Spy Thriller...",A thriller which weaves together three stories...,https://www.goodreads.com/book/show/124914.The...,4.028626,series horrific murders thriller international...
7304,A Stranger in the Mirror,A Stranger in the Mirror,,English,Sidney Sheldon,3.65,19893,488,"Fiction,Thriller,Mystery,Romance,Suspense,Thri...","Toby Temple is a superstar, the world's funnie...",https://www.goodreads.com/book/show/115130.A_S...,3.694052,world dark funniest man gets mysterious past s...


In [None]:
books.to_csv('/content/gdrive/MyDrive/books_recommender_chatbot/books.csv')

In [None]:
pickle.dump(cosine_sim, open('/content/gdrive/MyDrive/books_recommender_chatbot/cosine_sim.pickle', 'wb'))

# Chatbot

In [None]:
def greetings():
    print('Hey there! Welcome onboard! What is your name')
    bot_greetings = ['Howdy', 'Hi', 'Hello', 'Ola', 'Namaste', 'Wassup', 'Sup','Hey','Greetings']
    
    user = input()
    if user == None:
        print(f'{random.choice(bot_greetings)} user. My name is Joyce and I am here to recommend some books for you.\nIf you wish to exit, please type "quit"\nand if you wish to continue please type in the title of the book you want to get recommendations for.')
    else: 
        print(f'{random.choice(bot_greetings)} {user}. My name is Joyce and I am here to help recommend some books to you. If you wish to exit, please type "quit"\nand if you wish to continue please enter the title of the book you want to get recommendations for.')


In [None]:
def user_gratitude(user_input):
    user_input = user_input.lower()
     
    user_thanks = ["thanks", "thanks alot", "thank you", "that's helpful", "awesome joyce"]
    bot_reply = ["glad I could help!", "any time!", "my pleasure", "happy to help"]

    for word in user_input.split():
        if word in user_thanks:
            return random.choice(bot_reply)

In [None]:
def bot_response(user_input):
    #user_input = user_input.title()

    recommendations = get_recommendations(user_input)

    if recommendations.shape[0] == 0:
        bot_response = 'My apologies, I could not find any other books similar to the one you entered.'
    else:
        bot_response = f'Here is a list of all the books similar to the one you entered:\n {recommendations}'

    return bot_response

In [None]:
greetings()

exit_list = ['exit', 'later', 'bye', 'quit', 'clear', 'break']

while(True):
    user_input = input()
    if user_input.lower() in exit_list:
        print(f'Was a pleasure serving you. Hope to see you again')
        break

    else: 
        if user_gratitude(user_input) != None:
            print(f'Joyce: {user_gratitude(user_input)}')
        else:
            print(f'Joyce: {bot_response(user_input)}')

Hey there! Welcome onboard! What is your name
Susan
Namaste Susan. My name is Joyce and I am here to help recommend some books to you. If you wish to exit, please type "quit"
and if you wish to continue please enter the title of the book you want to get recommendations for.
to kill a mockingbird
1690 To Kill a Mockingbird:
pulitzer prize innocence experience takes readers book first published childhood sleepy southern town later made 1960 1961 harper lee always considered went dramatic mockingbird regional story became compassionate young alabama woman claims universal appeal translated human behavior deeply moving print also forty languages 18 million copies kill today simple love american literature humor classic instant bestseller pathos regarded crisis conscience rocked kindness masterpiece unforgettable novel roots win critical success hatred academy award cruelty winning film kill mockingbird english kill mockingbird harperlee classics fiction historical historicalfiction academi