In [1]:
'''
Book & Library Chatbot Assistant
'''
import nltk
import re
import pickle
import pandas as pd
import random
import numpy
from datetime import datetime
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize
from collections import defaultdict
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\myaku\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\myaku\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\myaku\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
#Knowledge Base Import
books_df = pd.read_csv("books.csv", sep=";", on_bad_lines='skip', encoding="latin-1", low_memory=False)
books_df = books_df.drop(columns = ["Image-URL-M", "Image-URL-S"])

'''
-----------------
GET BOOK TITLE FROM STRING IF POSSIBLE
# Info Argument could be any of the columns: 
ISBN;"Book-Title";"Book-Author";"Year-Of-Publication";"Publisher";"Image-URL-S";"Image-URL-M";"Image-URL-L"
Default - Find Title given book name
-----------------
'''
def find_book(text, info = 'Book-Title'):
    titles = books_df["Book-Title"]
    
    #Search through the titles and find if any are in the text
    for index, title in titles.items():
        if title.lower() in text.lower():
            if len(title) > 3:
                if info == None:
                    return books_df.loc[index]
                elif info == 'all':
                    return "\n".join([f"{key}: {value}" for key, value in books_df.iloc[index].to_dict().items()])
                else:
                    return books_df.loc[index, info]
    return None
'''
-----------------
CHECK IF ALL STRING ARE PRESENT IN TEXT
-----------------
'''
def check_all_present(string_list, text):
    for string in string_list:
        if string not in text.lower():
            return False
    return True
'''
-----------------
EXTRACT BOOK TITLE USING NAMED ENTITY RECOGNITION (NER)
-----------------
'''
def extract_title(text):
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == "WORK_OF_ART":
            return ent.text
    return None
'''
-----------------
CHECK IF ANY STRINGS ARE PRESENT IN TEXT GIVEN LIST
-----------------
'''
def check_any_present(string_list, text):
    for string in string_list:
        if string.lower() in text.lower():
            return True
    return False

'''
-----------------
RETURN RANDOM BOOK
-----------------
'''
def get_random_book():
    #Get a random Index
    random_row_index = random.randint(0, len(books_df)-1)
    
    #Convert to dictionary and return
    row_dict = books_df.iloc[random_row_index].to_dict()
    return "\n".join([f"{key}: {value}" for key, value in row_dict.items()])
'''
-----------------
RECOMMEND BOOK BASED ON RATINGS USING RANDOM FOREST REGRESSION
-----------------
'''
def recommend_book(book_ratings):
    #Convert the user dictionary to a dataframe
    user_df = pd.DataFrame.from_dict(book_ratings, orient='index', columns=['ratings'])
    user_df = user_df.rename_axis('Book-Title').reset_index().rename(columns={'ratings': 'rating'})
    
    #Make a copy of books_df and lowercase both dataframes
    user_df['Book-Title'] = user_df['Book-Title'].str.lower()
    books_df_copy = books_df.copy()
    books_df_copy['Book-Title'] = books_df_copy['Book-Title'].str.lower()

    # Replace non-numeric values in the Year-Of-Publication column with the median year of publication
    books_df_copy['Year-Of-Publication'] = pd.to_numeric(books_df_copy['Year-Of-Publication'], errors='coerce')
    median_year = books_df_copy['Year-Of-Publication'].median()
    books_df_copy['Year-Of-Publication'].fillna(median_year, inplace=True)
    
    #Merge the two dataframes together
    merged_df = pd.merge(user_df, books_df_copy, on='Book-Title')
    merged_df = merged_df.loc[:, ['Book-Title', 'Book-Author', 'Year-Of-Publication', 'rating']]
    
    #Encode book authors as a numeric value with a label encoder
    label_encoder_author = LabelEncoder()
    all_authors = pd.concat([merged_df["Book-Author"], books_df_copy["Book-Author"]]).str.lower().unique()
    label_encoder_author.fit(all_authors)
    merged_df["Book-Author"] = label_encoder_author.transform(merged_df["Book-Author"].str.lower())

    #X, Y Split and model fitting
    X = merged_df[["Book-Author", "Year-Of-Publication"]]
    y = merged_df["rating"]

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)

    books_df_copy["Book-Author"] = label_encoder_author.transform(books_df_copy["Book-Author"].str.lower())
    
    #Make a new frame for author and year and run prediction on it.
    X_new = books_df_copy[["Book-Author", "Year-Of-Publication"]]

    predictions = model.predict(X_new)
    books_df_copy["predicted_rating"] = predictions

    # Randomly return a book title from the top 20 predicted values
    top_20_books = books_df_copy.nlargest(20, 'predicted_rating')
    random_book = top_20_books.sample()
    return random_book['Book-Title'].values[0]

In [3]:
'''
-----------------
FUNCTION TO INITIATE CHAT GIVEN THE BOT AND A USER
-----------------
'''
def interact_with_chatbot(assistant, user_id):
    print("Welcome to the Personal Book Librarian!")
    print("Type 'quit' to exit.")
    
    #Loop between user input and chatbot responses, exit and save on quit.
    while True:
        user_input = input(f"{user_id}: ")
        if user_input.lower() == 'quit':
            break

        response = assistant.respond(user_id, user_input)
        print(f"\nLibrarian Bot: {response}\n")

    assistant.save_user_data()
    print("Goodbye!")

In [4]:
'''
-----------------
UserModel - CONTAINS USER DATA TO BE STORED
-----------------
'''
class UserModel:
    def __init__(self, name):
        self.name = name
        self.book_ratings = {}
        self.name_prompt = False
        self.add_book_prompt = False
        self.rating_prompt = False

In [5]:
'''
-----------------
PersonalLibrarian - CONTAINS LIBRARY CHATBOT FUNCTIONS AND METHODS
-----------------
'''
class PersonalLibrarian:
    '''
    -----------------
    CHATBOT INTIALIZATION
    -----------------
    '''
    def __init__(self, user_data_file):
        self.user_data_file = user_data_file
        self.user = self.load_user_data()
    '''
    -----------------
    USER DATA FUNCTIONS
    -----------------
    '''
    def load_user_data(self):
        try:
            with open(self.user_data_file, 'rb') as f:
                return pickle.load(f)
        except (FileNotFoundError, EOFError):
            return {}

    def save_user_data(self):
        with open(self.user_data_file, 'wb') as f:
            pickle.dump(self.user, f)

    def get_user_model(self, user_id):
        if user_id not in self.user:
            self.user[user_id] = UserModel("unknown")
        return self.user[user_id]
    
    '''
    -----------------
    CHAT RESPONSE HANDLING FUNCTIONS
    -----------------
    '''
    def respond(self, user_id, text):
        #LOAD USER DATA
        user_model = self.get_user_model(user_id)

        #CHANGE NAME - If flagged, use POS tagging or get input and set as the user's name
        if user_model.name_prompt:
            tagged_text = pos_tag(word_tokenize(text))
            first_name, last_name = None, None
            for i, (word, pos) in enumerate(tagged_text):
                if pos == 'NNP':
                    if not first_name:
                        first_name = word.capitalize()
                    else:
                        last_name = word.capitalize()
                        break
            if not first_name:
                user_model.name = text
            else:
                user_model.name = f"{first_name} {last_name}" if last_name else first_name
            user_model.name_prompt = False
            return f'Alright, i\'ve set your name as {user_model.name}'
        
        # NAME PROMPT IF DNE - If user doesn't have a name set the flag
        if user_model.name == 'unknown':
            user_model.name_prompt = True
            return f"Hello!, I've noticed I don't know your name in my knowledge base. Could you provide your first name so I know how to address you?"
        
        # BOOK PROMPT IF TITLE NONE - If NLP could not extract a title, then ask directly
        if user_model.add_book_prompt:
            try:
                title, rating = text.split(",")
                user_model.book_ratings[title.lower()] = rating
                user_model.add_book_prompt = False
                return f"Alright {user_model.name}, I've added the book: {title} to your ratings with a score of {rating}"
            except:
                return f"Sorry, that follows in incorrect input format. Please enter the book title and rating separated by a comma."
        
        #CHANGE NAME - Change the user's name
        if check_all_present(['change', 'name'], text):
            user_model.name_prompt = True
            return f'Alright, let me know what you want to change your name to'     
        
        #DETECT AND STORE RATINGS - When the user is giving their review for a book/rating.
        if 'rate' in text:
            title = find_book(text)
            
            if re.search(r'rate.*?(\d+)', text):
                rating = int(re.search(r'\d+', text).group())
            else:
                rating = None
            
            if title is not None and rating is not None:
                user_model.book_ratings[title.lower()] = rating
                return f"Alright {user_model.name}, I've added {title.lower()} to your book ratings with a rating of {rating}"
            else:
                user_model.add_book_prompt = True
                return f"I'm having trouble determining the title or rating, could you give me the title of the book and rating split by a comma?"
    
        #PRINT RATINGS FOR CATEGORY
        if "ratings" in text.lower():
            items = user_model.book_ratings
            response = f"{user_model.name}, your ratings for books are:\n"
            for item, rating in items.items():
                response += f"- {item}: {rating}/10\n"
            return response

        #RETURN A RANDOM BOOK FROM DATAFRAME
        if check_any_present(['random'], text):
            return f"Here's a random book I've chosen from my knowledge basehell: \n{get_random_book()}"
        
        #RETURN A LOOKUP BY TITLE
        if check_any_present(['lookup', 'search', 'find', 'show', 'author', 'isbn'], text) or check_all_present(['look', 'up'], text) or check_all_present(['tell', 'about'], text) or check_all_present(['what', 'is'], text):
            if 'isbn' in text.lower():
                return f"The ISBN I found for {find_book(text)} is: {find_book(text, 'ISBN')}"
            if 'author' in text.lower():
                return f"The Author I found for {find_book(text)} is: {find_book(text, 'Book-Author')}"
            if check_any_present(['picture', 'image', 'pic', 'cover', 'photo', 'graphic'], text):
                return f"The Picture I found for {find_book(text)} is: {find_book(text, 'Image-URL-L')}"
            if check_any_present(['when', 'time', 'date', 'published'], text):
                return f"The Year of Publication I found for {find_book(text)} is: {find_book(text, 'Year-Of-Publication')}"
            return f"Here's all the details for {find_book(text)}: \n{find_book(text, 'all')}"
            
            
        #HELLO RESPONSE
        if any(user_input in text for user_input in ['hello', 'hi', 'greetings']):
            return f"Hello, {user_model.name}! How can I help you today?" 
        
        #RECOMMEND A BOOK
        if 'recommend' in text:
            return f"Here is the book I recommend based on your ratings:\n{find_book(recommend_book(user_model.book_ratings), 'all')}"
        
        #DOCUMENTATION
        if check_any_present(['documentation', 'docs', 'help', 'tutorial'], text):
            return (
                f"Here's a list of some of my core functions with some examples:\n"
                f"Change what I call you | I'd like to change my name\n"
                f"Tell me how you liked or disliked books you've read | I rate To Kill a Mockingbird by Harper Lee a 5/10.\n"
                f"Have me tell you your ratings | I would like to see my ratings.\n"
                f"Ask me to recommend you a random book to read | Give me a random book to read\n"
                f"Ask me about the details of a book | tell me about the ISBN/author/image/date of old yeller\n"
                f"Ask me to recommend you a book based on your ratings"
            )
        
        #CONFUSED RESPONSE
        return f"I'm not sure what you're asking, {user_model.name}. Could you please rephrase?\nIf you need documentation let me know"

In [None]:
user_id = "Matthew Y"
user_data_file = user_id + '.pkl'
assistant = PersonalLibrarian(user_data_file)
#- the giver: 9/10
#- ender's game: 10/10
#- to kill a mockingbird: 7/10
#- old yeller: 4/10
#I rate To Kill a Mockingbird by Harper Lee a 5/10.
#find the ISBN of to kill a mockingbird
interact_with_chatbot(assistant, user_id)

Welcome to the Personal Book Librarian!
Type 'quit' to exit.
Matthew Y: recommend

Librarian Bot: Here is the book I recommend based on your ratings:
ISBN: 3442446937
Book-Title: Tage der Unschuld.
Book-Author: Richard North Patterson
Year-Of-Publication: 2000
Publisher: Goldmann
Image-URL-L: http://images.amazon.com/images/P/3442446937.01.LZZZZZZZ.jpg

Matthew Y: hello

Librarian Bot: Hello, Matthew! How can I help you today?

