# CM2015 Midterm Project: Library Assistant
This chatbot is a virtual library assistant. It helps users query opening hours, check book availability, borrow/return books, and provides personalized interaction by remembering the user's name.

## Download

In [1]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.


## Import

In [93]:
import json
import random
import re
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet

### Download ntlk data packs

In [106]:
nltk.download("punkt", quiet=True)
nltk.download("wordnet", quiet=True)
nltk.download("omw-1.4", quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)

lemmatizer = WordNetLemmatizer()

## Advanced function

### Memory system

In [136]:
def replace_with_memory(template: str, memory_data: dict) -> str:
    """
    Get a template and iterate the memory data
    Then try to replace every key in memory data with it's value
    
    Args:
        template (str): The template string containing placeholders.
        memory_data (dict): The dictionary containing placeholders and it's data.

    Returns:
        str: The formatted string.
    """
    for key in memory_data:
        if f"{{{key}}}" in template:
            template = template.replace(f"{{{key}}}", memory_data[key])

    return template

In [117]:
def capture_information(match_data: re.Match, key: str, memory: dict) -> None:
    """
    Extracts information from a regex match object and stores it in the memory dictionary.

    Args:
        match_data (re.Match): The regex match object containing captured groups.
        key (str): The key under which the information should be stored in memory.
        memory (dict): The dictionary that stored the capture info.

    Returns:
        None
    """
    if match_data.groups():
        memory[key] = match_data.group(1)

### Book list

In [118]:
def update_books(path: str, data: dict) -> None:
    """
    Writes the current state of the books data back to the JSON file.

    Args:
        path (str): The file path where the JSON data should be saved.
        data (dict): The dictionary containing the book data to be saved.

    Returns:
        None
    """
    text = '{"books":['
    for book in data["books"]:
        element = '{'
        for key, value in book.items():            
            if type(value) == str:
                element += f'"{key}":"{value}",'
            else:
                element += f'"{key}":{value},'

        element = element[:-1] + '},'
        text += element
    
    # remove the last comma
    text= text[:-1]
    text += ']}'

    with open(path, 'w', encoding="utf-8") as f:
        f.write(text)

In [119]:
def check_availability(book: dict) -> str:
    """
    Checks if there are copies of the book available to borrow.

    Args:
        book (dict): The dictionary object representing a single book.

    Returns:
        str: A response of the result of checking the availability of the book.
    """
    available = book['total_quantity'] - book['loaned_quantity']
    if available > 0:
        return f"Yes, we have '{book['title']}'. There are {available} copies in library."
    else:
        return f"We have '{book['title']}', but all copies are currently loaned out."

In [120]:
def query_author(book: dict) -> str:
    """
    Retrieves the author of the specified book.

    Args:
        book (dict): The dictionary object representing a single book.

    Returns:
        str: A response of the result of checking the author of the book.
    """
    if book:
        return f"The author of '{book['title']}' is {book['author']}."
    else:
        return f"Sorry, I don't know the author for '{book["title"]}'."

In [123]:
def list_books(books_data: dict) -> str:
    """
    Generates a list of all book titles currently in the catalog.

    Args:
        books_data (dict): The entire books dataset.

    Returns:
        str: A string listing all book titles.
    """
    titles = [b["title"] for b in books_data["books"]]
    response = "Here are the books we have:\n"
    for title in titles:
        response += title + ',\n'
    response = response[:-2]
    return response

In [124]:
def borrow_book(book: dict, books_data: dict) -> str:
    """
    Borrow the book if it is available 

    Args:
        book (dict): The dictionary object representing a single book.
        books_data (dict): The entire books dataset.

    Returns:
        str: A success or failure message based on stock availability.
    """
    if book["loaned_quantity"] < book["total_quantity"]:
        book["loaned_quantity"] += 1
        return f"Success! You have borrowed '{book['title']}'."
    else:
        return f"Sorry, '{book['title']}' is currently out of stock."

In [126]:
def return_book(book: dict, books_data: dict) -> str:
    """
    Return the book if someone loaned before.

    Args:
        book (dict): The dictionary object representing a single book.
        books_data (dict): The entire books dataset.

    Returns:
        str: A success message or a warning if the book wasn't loaned.
    """
    if book['loaned_quantity'] > 0:
        book['loaned_quantity'] -= 1
        return f"Thank you for returning '{book['title']}'."
    else:
        return f"That's strange. All copies of '{book['title']}' are already in the library. You sure this book is from out library?"

In [131]:
def get_response_of_books(tag: str, user_input: str, match: re.Match, books_data: dict) -> str:
    """
    Router function to handle book-specific intents.

    Args:
        tag (str): The intent tag.
        user_input (str): The user input.
        match (re.Match): The regex match object containing the book title capture group.
        books_data (dict): The entire books dataset.

    Returns:
        str: The response generated by the specific book handler function.
    """
    title = match.group(1)
    found_book = None
    for book in books_data["books"]:
        if title.lower() in book["title"].lower():
            found_book = book
            break
    
    if not found_book:
        return f"Sorry, I couldn't find a book named '{title}' in our catalog."
    
    if tag == "check_availability":
        return check_availability(found_book)
    elif tag == "query_author":
        return query_author(found_book)
    elif tag == "list_books":
        return list_books(books_data)
    elif tag == "borrow_book":
        return borrow_book(found_book, books_data)
    elif tag == "return_book":
        return return_book(found_book, books_data)

    # default message
    return "I'm sorry. I didn't understand. Please say again."

## Basic function

In [129]:
def get_wordnet_pos(treebank_tag: nltk.treebank) -> nltk.wordnet:
    """
    Maps NLTK POS tags to WordNet POS tags.

    Args:
        treebank_tag (str): The POS tag returned by nltk.pos_tag.

    Returns:
        nltk.wordnet: The corresponding WordNet POS constant.
    """
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

In [52]:
def load_json(path: str) -> dict:
    """
    Loads data from a json file.
    Args:
        path (str): The file path to the json file.
    Returns:
        dict: A dictionary convert from the json file
    """
    try:
        with open(path, 'r', encoding="utf-8") as f:
            data = json.load(f)
        return data
    except:
        print(f"Error: {path} not found.")
        return None

In [133]:
def preprocess_input(user_input: str) -> str:
    """
    Preprocesses the user input for NLP analysis.
    Steps:
    1. Converts input to lower case.
    2. Removes punctuation marks.
    3. Tokenizes the string.
    4. Performs POS tagging.
    5. Lemmatizes tokens.

    Args:
        user_input (str): The user input.

    Returns:
        str: A string of lemmatized tokens which separate with space.
    """
    # preprocess
    lower_input = user_input.lower()
    clean_input = re.sub(r'[^\w\s]', '', lower_input)

    print(f"Clean input: {clean_input}")
    
    # tokenization
    tokens = nltk.word_tokenize(clean_input)
    print(f"Tokens: {tokens}")

    # get POS tag
    pos_tags = nltk.pos_tag(tokens)
    print(f"POS_tags: {pos_tags}")

    # Exception dictionary
    manual_corrections = {
        "borrowing": wordnet.VERB,
        "loaned": wordnet.VERB
    }
    
    # lemmatization with POS
    lemmatized_tokens = []
    for word, tag in pos_tags:
        if word in manual_corrections:
            word_tag = manual_corrections[word]
        else:
            word_tag = get_wordnet_pos(tag)
        lemmatized_tokens.append(lemmatizer.lemmatize(word, pos=word_tag))
    
    return " ".join(lemmatized_tokens)

In [134]:
def get_response(user_input: str, intents_data: dict, books_data: dict) -> str:
    """
    Determines the appropriate response based on the user's input.
    Matches the input against regex patterns defined in intents_data.
    Handles memory storage and delegates book-related queries to book functions.

    Args:
        user_input (str): The user input.
        intents_data (dict): The dictionary loaded from intents.json.
        books_data (dict): The dictionary loaded from books.json.

    Returns:
        str: The final response string to be displayed to the user.
    """
    preprocessed_input = preprocess_input(user_input)

    for intent in intents_data["intents"]:
        for pattern in intent["patterns"]:
            match = re.search(pattern, user_input, re.IGNORECASE)
            vague_match = preprocess_input(pattern) in preprocessed_input

            if intent["tag"] == "provide_name" and match:
                capture_information(match, "name", user_memory)
            
            response = ""
            if intent["tag"] in ["check_availability", "query_author", "list_books", "borrow_book", "return_book"] and match:
                return get_response_of_books(intent["tag"], user_input, match, books_data)
            elif match or vague_match:
                response = random.choice(intent["responses"])
                response = replace_with_memory(response, user_memory)
                return response

    # default message
    return "I'm sorry. I didn't understand. Please say again."

## Main loop

In [135]:
def chatbot_main() -> None:
    """
    The main execution loop for the Library Chatbot.

    Steps:
    1. Loads intents and book data.
    2. Prints the welcome message.
    3. Enters a loop to accept user input until exit commands.
    4. Saves book data when exit.
    
    Returns:
        None
    """
    instents_data = load_json("./intents.json")
    books_data = load_json("./books.json")
    if not books_data:
        books_data = []
    if not instents_data:
        return

    print(instents_data["welcome_message"])
    print("==================================")
    while True:
        user_input = input("You: ")

        if user_input.lower() in ["bye", "quit", "exit"]:
            print(instents_data["exit_message"])
            update_books("./books.json", books_data)
            break

        response = get_response(user_input, instents_data, books_data)
        print(f"Library Chatbot: {response}")

## Execute

In [44]:
if __name__ == "__main__":
    chatbot_main()

Library Chatbot: Hello! I am your personal library assistant. What can I help you.
Type 'bye' or 'quit' to exit.


You:  i want to borrow Clean Code


Library Chatbot: Success! You have borrowed 'Clean Code'.


You:  i want to return Clean Code


Library Chatbot: Thank you for returning 'Clean Code'.


You:  bye


Library Chatbot: Goodbye! Happy reading!


## Test Case

In [78]:
# This is the testing data
intents_test = load_json("./intents.json")
books_test = {
    "books": [
        {
            "book_id": "T001",
            "title": "Test Book",
            "author": "Tester",
            "total_quantity": 5,
            "loaned_quantity": 0
        }
    ]
}

### Case 1: Context & Memory
In the case it will test the memory functionality

In [68]:
def test_case_1() -> bool:
    print("Test Case 1: Context & Memory")
    # simulate first input
    user_input = "My name is Alice"
    response = get_response(user_input, intents_test, books_test)
    
    # test whether memorized or not
    print(f"Input: '{user_input}'")
    if user_memory.get("name") == "Alice":
        print("Result: PASS (Memory stored 'Alice')")
        return True
    else:
        print(f"Result: FAIL (Memory is {user_memory.get('name')})")
        return False

In [69]:
test_case_1()

Test Case 1: Context & Memory
Input: 'My name is Alice'
Result: PASS (Memory stored 'Alice')


True

### Case 2: NLP Preprocessing
In this case, it will test the lemmatization

In [111]:
def test_case_2() -> bool:
    print("Test Case 2: NLP Preprocessing (Lemmatization)")
    raw_user_input = "What are the borrowing limits?"
    processed_user_input = preprocess_input(raw_user_input)
    print(f"Raw Input: '{raw_user_input}'")
    print(f"Processed: '{processed_user_input}'")
    
    # Check did borrowing conver to borrow
    if "borrow" in processed_user_input and "borrowing" not in processed_user_input:
        print("Result: PASS (Lemmatization worked: borrowing -> borrow)")
        return True
    else:
        print("Result: FAIL (Lemmatization check failed)")
        return False

In [114]:
test_case_2()

Test Case 2: NLP Preprocessing (Lemmatization)
Clean input: what are the borrowing limits
Tokens: ['what', 'are', 'the', 'borrowing', 'limits']
POS_tags: [('what', 'WDT'), ('are', 'VBP'), ('the', 'DT'), ('borrowing', 'NN'), ('limits', 'NNS')]
Raw Input: 'What are the borrowing limits?'
Processed: 'what be the borrow limit'
Result: PASS (Lemmatization worked: borrowing -> borrow)


True

### Case 3: Book Transaction Logic
In this case, it will simulate the entire process of borrowing and returning book

In [88]:
def test_case_3() -> bool:
    print("Test Case 3: Book Borrowing Logic")
    book_title = "Test Book"
    print(f"Initial Stock: {books_test['books'][0]['total_quantity'] - books_test['books'][0]['loaned_quantity']}")
    original_loaned = books_test['books'][0]['loaned_quantity']

    user_input = "I want to borrow test book"
    response = get_response(user_input, intents_test, books_test)
    print(f"Bot Response: {response}")
    
    # Check loaned quantity
    current_loaned = books_test['books'][0]['loaned_quantity']
    if current_loaned - original_loaned == 1:
        print("Result: PASS (Original loaned quantity increased 1)")
    else:
        print(f"Result: FAIL (Loaned quantity is {current_loaned})")

In [89]:
test_case_3()

Test Case 3: Book Borrowing Logic
Initial Stock: 1
Bot Response: Success! You have borrowed 'Test Book'.
Result: PASS (Original loaned quantity increased 1)
