# Chat Bots

NLP - Spring Semester of 2024 at University of Tehran - CA6

In [None]:
%pip install --upgrade --quiet requests\
    beautifulsoup4

%pip install --upgrade --quiet langchain langchain-community langchain-together\
    unstructured[pdf]\
    langchain-huggingface\
    faiss-cpu faiss-gpu\
    rank_bm25\
    tavily-python

%pip install langgraph

In [None]:
import requests
from bs4 import BeautifulSoup
import re
from urllib.parse import urljoin
import os.path
from textwrap import dedent

from langchain_community.document_loaders import OnlinePDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.embeddings import CacheBackedEmbeddings

from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever

from langchain_together import ChatTogether
from langchain.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from typing import Literal

from langchain.utilities.tavily_search import TavilySearchAPIWrapper
from langchain.tools.tavily_search import TavilySearchResults
from langchain_core.runnables import chain
from langchain_core.documents.base import Document

from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, AIMessage

from typing import TypedDict
from langchain_core.documents import Document
from langchain_core.messages.base import BaseMessage

from langgraph.graph import StateGraph, END
from IPython.display import Image, display
from langchain_core.runnables.graph import MermaidDrawMethod

from IPython.core.display import Markdown

## Part 1. Getting the Required Data

In this project we are going to use the pdfs of "Speech and language Processing" by Dan Jurafsky and James H. Martin to build a chat bot that will answer your NLP questions! For this purpose we'll first download the html page of the book and scrape out the links to each chapter. We'll then download and store them all.

In [None]:
url = 'https://stanford.edu/~jurafsky/slp3/'
response = requests.get(url)
html_content = None

if response.status_code == 200:
    html_content = response.content
    print("Successfully fetched the web page!")
else:
    print("Failed to fetch the web page!")

In [None]:
soup = BeautifulSoup(html_content, 'html.parser')
links = soup.find_all('a', href=True)
links = [link['href'] for link in links if re.search(r'^\d+\.pdf$', link['href'])]
links = [urljoin(url, link) for link in links]

print(links)

Now that we have the links, we'll load them in a suitable format with LangChain tools.

In [None]:
documents = []
for link in links:
    loader = OnlinePDFLoader(link)
    documents.extend(loader.load())

Split the text into smaller chunks using the `langchain-text-splitter`.

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024,
    chunk_overlap=64,
    length_function=len
)

document_chunks = text_splitter.split_documents(documents)

## Part 2. Embedding and Storing Vectors

Now that we have our documents ready, let's vectorize them using hugging face embedding vectors. We'll then use Faiss to efficiently store and query these vectors.

In [None]:
underlying_embedding = HuggingFaceEmbeddings()
store = LocalFileStore('./cache/')

embeddings = CacheBackedEmbeddings.from_bytes_store(
    underlying_embedding, store
)

In [None]:
faiss_vector_store = None

embeddings_db_file = "faiss_index"

if not os.path.isfile(embeddings_db_file):
    faiss_vector_store = FAISS.from_documents(document_chunks, embeddings)
    faiss_vector_store.save_local(embeddings_db_file)
else:
    faiss_vector_store = FAISS.load_local(embeddings_db_file)

print(faiss_vector_store.index.ntotal)

## Part 3. Implementing Retrievers

In this part we'll use the `EnsembleRetriever` to combine a semantic retriever with a lexical one. We'll use `FAISS` and `BM25Retriever` for this purpose.

In [None]:
bm25_retriever = BM25Retriever.from_texts(
    [doc.page_content for doc in document_chunks]
)
bm25_retriever.k = 4

In [None]:
faiss_retriever = faiss_vector_store.as_retriever(
    search_kwargs={'k': 2}
)

Test both retrievers before taking the next step.

In [None]:
related_query = "Recurrent Neural Networks"
unrelated_query = "Red-black trees"
super_unrelated_query = "President of Congo"

faiss_retriever.invoke(related_query)

In [None]:
bm25_retriever.invoke(related_query)

Now Let's ensemble these two!

In [None]:
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever], wights=[0.5, 0.5]
)

print(ensemble_retriever.invoke(related_query))
print(ensemble_retriever.invoke(unrelated_query))
print(ensemble_retriever.invoke(super_unrelated_query))

## Part 4. Router Chain

For the first part of this chat bot, we need to make a chain that will decide what class does the user question belong to. We'll start by making a chat template to guide the LLM through our routing task. It will ask the model to classify the given question as on of three kinds: `NLP`, `Computer Science`, `Other`.

We will then pass the prompt to the Llama model, using the TogetherAI API. In the end we'll use the Pydantic parser to parse the Llama result.

In [None]:
router_prompt_template = \
    """
    You must route user queries to one of three classes: VectorStore, SearchEngine, or None.
    If the user query is about Natural Language Processing and Speech Processing, choose VectorStore.
    If the query is something about computer science but it's not related to NLP, SearchEngine.
    If it's nothing about NLP or Computer Science, choose None.
    Output only the chosen class. Do not output anything more than that.
    {output_instruction}
    query: {query}
    """
router_prompt_template = dedent(router_prompt_template)

router_prompt = ChatPromptTemplate.from_template(
    template=router_prompt_template
)

In [None]:
together_ai_api_key = "542f31d103435d11e271b87c2e5d84454ad362059198d0a4851d62a8587adf80"

router_llm = ChatTogether(
    together_api_key=together_ai_api_key,
    model="meta-llama/Llama-3-70b-chat-hf",
    temperature=0
)

In [None]:
class QueryKind(BaseModel):
    class_name: Literal["VectorStore", "SearchEngine", "None"] = Field()

router_parser = PydanticOutputParser(pydantic_object=QueryKind)

In [None]:
router_chain = router_prompt | router_llm | router_parser

nlp_test_result = router_chain.invoke({
    "query": "How should I implement an LSTM model?",
    "output_instruction": router_parser.get_format_instructions()
})

cs_test_result = router_chain.invoke({
    "query": "What's a redd-black tree?",
    "output_instruction": router_parser.get_format_instructions()
})

other_test_result = router_chain.invoke({
    "query": "Who's the president of Congo?",
    "output_instruction": router_parser.get_format_instructions()
})

print(f"NLP: {nlp_test_result}, CS: {cs_test_result}, Other: {other_test_result}")

## Part 5. Search Engine Chain

We've implemented the vector store function, and the router chain. Now in order to answer user queries about computer science, outside NLP, we'll need to use a search engine to retrieve documents. We'll then use one of the two methods, local vector store or a remote search engine to provide our language model some context to answer the user query.
We'll be using Tavily AI for our search engine.

In [None]:
tavily_api_key = "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"

tavily_search_wrapper = TavilySearchAPIWrapper(tavily_api_key=tavily_api_key)
tavily_search = TavilySearchResults(api_wrapper=tavily_search_wrapper, max_results=5)

tavily_search.invoke("What's NLP?")

In [None]:
@chain
def parse_search_engine(documents: list[dict[str, str]]) -> list[Document]:
    result_documents = [Document(
        page_content=doc['content'],
        metadata={'url': doc['url']}
    ) for doc in documents]

    return result_documents

In [None]:
search_engine_chain = tavily_search | parse_search_engine

search_engine_chain.invoke("What's NLP?")

## Part 6. Relevancy Check Chain

We need another chain that prompts an LLM and ask it if the retrieved documents are relevant to the question or not.

In [None]:
relevancy_check_template = \
    """
    You are provided with q user question and a document. If the given document is relevant to the user question and can be used to answer it, output 'Relevant', and if not, output 'Irrelevant'. Only output the words Relevant and Irrelevant in a JSON format as described in the output instructions.
    User question: {user_query}
    Document: {retrieved_document}
    Output instruction: {output_instruction}
    """
relevancy_check_template = dedent(relevancy_check_template)

relevancy_check_prompt = ChatPromptTemplate.from_template(
    template=relevancy_check_template
)

In [None]:
relevancy_check_llm = ChatTogether(
    together_api_key=together_ai_api_key,
    model="meta-llama/Llama-3-70b-chat-hf",
    temperature=0
)

In [None]:
class RelevancyKind(BaseModel):
    class_name: Literal["Relevant", "Irrelevant"] = Field()

relevancy_check_parser = PydanticOutputParser(pydantic_object=RelevancyKind)

In [None]:
relevancy_check = relevancy_check_prompt | relevancy_check_llm | relevancy_check_parser

user_query = "How to implement recurrent neural network?"
retrieved_document = search_engine_chain.invoke(user_query)[0]
result = relevancy_check.invoke({
    "user_query": user_query,
    "retrieved_document": retrieved_document,
    "output_instruction": relevancy_check_parser.get_format_instructions()
})

print(f'Retrieved Document: {retrieved_document}')
print(f'Relevancy Check: {result}')

Let's also trick the model and see if it succeeds or not!

In [None]:
trick_user_query = "Who was the first person to land on the moon?"
relevancy_check.invoke({
    "user_query": trick_user_query,
    "retrieved_document": retrieved_document,
    "output_instruction": relevancy_check_parser.get_format_instructions()
})

## Part 7. Fallback Chain

In [None]:
fallback_template = \
    """
    You are a friendly and kind teaching assistant. Your job is to provide educational material related to NLP and Speech Recognition to the human user. Do not respond to the queries that are outside the context of NLP and Speech Recognition. If a query is not related acknowledge your limitations.

    Current conversation:

    {chat_history}


    Human: {query}
    """

fallback_template = dedent(fallback_template)
fallback_prompt = ChatPromptTemplate.from_template(fallback_template)

In [None]:
def gather_chat_history(context) -> list[str]:
    chat_history = []

    for message in context['chat_history']:
        if isinstance(message, HumanMessage):
            chat_history.append(f'Human: {message.content}')
        else:
            chat_history.append(f'AI: {message.content}')

    return chat_history

@chain
def gather_info(context) -> dict:
    return {
        "chat_history": gather_chat_history(context),
        "query": context['query']
    }

In [None]:
fallback_llm = ChatTogether(
    together_api_key=together_ai_api_key,
    model="meta-llama/Llama-3-70b-chat-hf",
    temperature=1
)

In [None]:
fallback_chain = gather_info | fallback_prompt | fallback_llm | StrOutputParser()

## Pat 8. Generate with Context Chain

This chain will get the user query along with some retrieved documents, and try to answer the user with the help of an LLM.

In [None]:
generate_template = \
    """
    You are a helpful assistant. Answer the query below based only on the provided context. If the given context is not relevant, DO NOT answer based on your own knowledge.

    Context: {documents}

    Query: {query}
    """

generate_template = dedent(generate_template)
generate_prompt = ChatPromptTemplate.from_template(generate_template)

In [None]:
generate_llm = fallback_llm

generate_with_context_chain = generate_prompt | generate_llm | StrOutputParser()

## Part 9. Implementing the Workflow

First we need a customized dictionary to save the chat history and other materials related to the conversation context.

In [None]:
class BotState(TypedDict):
    """This is a class to save the current chat context and history with the bot."""

    query: str
    chat_history: list[BaseMessage]
    generation: str
    documents: list[Document]


Then we implement the nodes using the previously developed chains.

In [None]:
def router_node(state: dict) -> str:
    try:
        result = router_chain.invoke({
            'query': state['query'],
            'output_instruction': router_parser.get_format_instructions()
        })

        class_name = result.class_name
    except:
        return 'LLMFallback'

    if class_name == 'None':
        return 'LLMFallback'

    return class_name

In [None]:
def vector_store_node(state: dict) -> dict:
    return {
        "documents": ensemble_retriever.invoke(input=state['query'])
    }

In [None]:
def search_engine_node(state: dict) -> dict:
    return {
        "documents": search_engine_chain.invoke(state['query'])
    }

In [None]:
def filter_docs_node(state: dict) -> dict:
    documents = state['documents']
    filtered_documents = []
    for doc in documents:
        try:
            relevancy = relevancy_check.invoke({
                'user_query': state['query'],
                'retrieved_document': doc,
                'output_instruction': relevancy_check_parser.get_format_instructions()
            })
            relevancy = relevancy.class_name
            if relevancy == 'Relevant':
                filtered_documents.append(doc)
        except:
            continue


    return {'documents': filtered_documents}

In [None]:
def fallback_node(state: dict) -> dict:
    return {
        'generation': fallback_chain.invoke(state)
    }

In [None]:
def generate_with_context_node(state: dict) -> dict:
    return {
        'generation': generate_with_context_chain.invoke(state)
    }

Now that all the nodes are ready and implemented, we can proceed to create the agent graph.

In [None]:
workflow = StateGraph(BotState)

workflow.add_node('vector_store', vector_store_node)
workflow.add_node('search_engine', search_engine_node)
workflow.add_node('fallback', fallback_node)
workflow.add_node('generate_with_context', generate_with_context_node)
workflow.add_node('filter_docs', filter_docs_node)

workflow.set_conditional_entry_point(
    router_node,
    {
        'VectorStore': 'vector_store',
        'SearchEngine': 'search_engine',
        'LLMFallback': 'fallback'
    }
)
workflow.add_edge('vector_store', 'filter_docs')
workflow.add_edge('search_engine', 'filter_docs')
workflow.add_conditional_edges(
    'filter_docs',
    lambda docs: 'search_engine' if len(docs) == 0 else 'generate_with_context',
    {
        'search_engine': 'search_engine',
        'generate_with_context': 'generate_with_context'
    }
)
workflow.add_edge('fallback', END)
workflow.add_edge('generate_with_context', END)

In [None]:
app = workflow.compile(debug=False)
display(
    Image(
        app.get_graph().draw_mermaid_png(
            draw_method=MermaidDrawMethod.API,
        )
    )
)

Let's test it out!

In [None]:
response1 = app.invoke({'query': 'Hello! I wanna know about NLP.', 'chat_history': []})
response1['chat_history'] = [HumanMessage(response1['query']), AIMessage(response1['generation'])]

Markdown(response1['generation'])

In [None]:
state = response1
state['query'] = 'Teach me about RNN!'
response2 = app.invoke(state)

Markdown(response2['generation'])