# Part 1: Data loading

In [None]:
!pip install langchain langchain-community pypdf sentence-transformers tiktoken rank_bm25 langchain-together tavily-python langgraph gradio -q

In [None]:
!pip install faiss-gpu -q
#!pip install faiss-cpu -q

In [None]:
from bs4 import BeautifulSoup
import requests
import urllib.parse

page_url = 'https://stanford.edu/~jurafsky/slp3/'
page_dom = BeautifulSoup(requests.get(page_url).content, 'html.parser')

chapters = []
selected_chapters = ['13', '14', '15', '16']

for pdf_anchor in filter(lambda a: a['href'].split('.pdf')[0].isnumeric(), page_dom.findAll('a')):
  if pdf_anchor['href'].split('.pdf')[0] not in selected_chapters:
    continue

  pdf_url = urllib.parse.urljoin(page_url, pdf_anchor['href'])
  chapters.append({'title': pdf_anchor.text, 'url': pdf_url})

print(chapters)

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders.merge import MergedDataLoader

loaders = [PyPDFLoader(chapter['url']) for chapter in chapters]
loader_all = MergedDataLoader(loaders=loaders)
docs = loader_all.load()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
chunks = text_splitter.split_documents(docs)

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

# Part 2: Embedding and store

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.storage import LocalFileStore
from langchain.embeddings import CacheBackedEmbeddings

embedding_function = HuggingFaceEmbeddings(show_progress=True, multi_process=True)

store = LocalFileStore("/content/drive/MyDrive/CachedEmbeddings/")

cached_embedder = CacheBackedEmbeddings.from_bytes_store(embedding_function, store)

In [None]:
vector_store = FAISS.from_documents(documents=chunks, embedding=embedding_function) #FAISS.from_documents(documents=chunks, embedding=cached_embedder)

# Part 3: Retriever

In [None]:
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever

bm25_retriever = BM25Retriever.from_documents(chunks, search_kwargs={"k": 3})
faiss_retriever = vector_store.as_retriever(search_kwargs={"k": 3})

ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever], weights=[0.3, 0.7]
)

In [None]:
sample_questions = ["What is a lexical gap?", "Why are Binary Trees important?", "Who is the president of bolivia?"]

In [None]:
for question in sample_questions:
  print(f'Question: {question}')
  print(ensemble_retriever.invoke(question))

# Part 4: Router chain

In [None]:
from google.colab import userdata

In [None]:
from langchain_together import ChatTogether

llm = ChatTogether(
  together_api_key=userdata.get('together_api_key'),
  model="meta-llama/Llama-3-70b-chat-hf",
  temperature=0
)

In [None]:
from langchain.prompts import ChatPromptTemplate

router_prompt_template = (
    "You are an expert in routing user queries to either a `VectorStore` or a `SearchEngine` or `None` depending on the conditions below: .\n"
    "If the query is *NOT RELATED* to Computer Science or NLP, choose `None`.\n"
    "If the query is *RELATED* to Natural Language Processing Applications (Machine Translation, Question Answering and Information Retrieval, Chatbots and Dialogue Systems, Automatic Speech Recognition and Text-to-Speech), choose `VectorStore`.\n"
    "If the query is *RELATED* to Computer Science (Algorithms and Data Structures, Computer Networks, Database Systems, Operating Systems, Artificial Intelligence and Machine Learning, Computer Security, Software Engineering, etc), choose `SearchEngine`.\n"
    "Again, If the query is *NOT RELATED* to Computer Science or NLP, choose `None`.\n"
    "Return only and only the name of the tool you chose and *nothing more*.\n"
    "{output_instructions}\n"
    "Query: {query}\n"
)

router_prompt = ChatPromptTemplate.from_template(
    template=router_prompt_template,
)

In [None]:
from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

from typing import Literal

class ChosenTool(BaseModel):
    tool_name: Literal["None", "VectorStore", "SearchEngine"] = Field(description="Chosen tool by LLM in question routing.")

question_router_parser = PydanticOutputParser(pydantic_object=ChosenTool)
question_router_parser.get_format_instructions()

In [None]:
chain_router = router_prompt | llm | question_router_parser

In [None]:
for question in sample_questions:
  print(f'Question: {question}')
  print(chain_router.invoke({"query": question, "output_instructions": question_router_parser.get_format_instructions()}))

# Part 5: Search Engine chain

In [None]:
import os
from langchain.utilities.tavily_search import TavilySearchAPIWrapper
from langchain.tools.tavily_search import TavilySearchResults

search = TavilySearchAPIWrapper(tavily_api_key=userdata.get("TAVILY_API_KEY"))
tavily_tool = TavilySearchResults(api_wrapper=search, max_results=5, search_depth="advanced")

In [None]:
from langchain_core.runnables import RunnableLambda
from langchain_core.documents import Document

def search_post_process_func(results):
    documents = []

    for result in results:
      documents.append(Document(
        page_content=result['content'],
        metadata={"source": result['url']}
      ))

    return documents

search_post_process = RunnableLambda(search_post_process_func)

In [None]:
chain_engine_search = tavily_tool | search_post_process

In [None]:
sample_documents = []

In [None]:
for question in sample_questions:
  print(f'Question: {question}')
  results = chain_engine_search.invoke(question)
  print(results)

  sample_documents.append(results[0])

# Part 6: Relevancy Check

In [None]:
relevancy_check_template = (
    "You are an expert in determining whether a document is relevant to a user query or not.\n"
    "If the document is related to the query return only and only `Relevant`.\n"
    "If the document is NOT related to the query return only and only `Irrelevant`.\n"
    "A relevant document will help the user to understand their questions better or answer them.\n"
    "A relevant document will guide a helpful assistant in answering a user's query and will it discuss the specific topic of the user query.\n"
    "{output_instructions}\n"
    "Document: ```{document}```\n"
    "Query: ```{query}```\n"
)

relevancy_check_prompt = ChatPromptTemplate.from_template(
    template=relevancy_check_template,
)

In [None]:
class Relevance(BaseModel):
    relevance: Literal["Relevant", "Irrelevant"] = Field(description="Relevancy determined by the LLM.")

relevancy_check_parser = PydanticOutputParser(pydantic_object=Relevance)
relevancy_check_parser.get_format_instructions()

In [None]:
chain_check_relevancy = relevancy_check_prompt | llm | relevancy_check_parser

In [None]:
for question, document in zip(sample_questions, sample_documents):
  print(f'Question: {question}')
  print(f'Document: {document}')

  print(chain_check_relevancy.invoke({"query": question, "document": document,"output_instructions": relevancy_check_parser.get_format_instructions()}))

# Part 7: Fallback chain

In [None]:
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter

fallback_prompt = ChatPromptTemplate.from_template(
    (
        "You are a friendly assistant to assist in related subjects below:.\n"
        "1. Natural Language Processing (Machine Translation, Question Answering, Information Retrieval, Text Classification, Sentiment Analysis, Named Entity Recognition, Part-of-Speech Tagging, Dependency Parsing, Neural Networks, Language Models, etc)\n"
        "2. Computer Science (Algorithms and Data Structures, Computer Networks, Database Systems, Operating Systems, Artificial Intelligence and Machine Learning, Computer Security, Software Engineering, etc)\n"
        "Do not respond to queries that are not related.\n"
        "If a query is not related to NLP or Computer Science, acknowledge your limitations.\n"
        "Provide concise responses to only related queries.\n\n"
        "Current conversations:\n\n{chat_history}\n\n"
        "human: {query}"
    )
)


fallback_chain = (
    {
        "chat_history": lambda x: "\n".join(
            [
                (
                    f"User: {msg.content}"
                    if isinstance(msg, HumanMessage)
                    else f"Assistant: {msg.content}"
                )
                for msg in x["chat_history"]
            ]
        ),
        "query": itemgetter("query") ,
    }
    | fallback_prompt
    | llm
    | StrOutputParser()
)

# Part 8: Generate with context

In [None]:
generate_with_context_template = (
    "You are a helpful assistant. Answer the query below based only on the provided context. If the given context is not relevant, DO NOT answer based on your own knowledge\n\n"
    "context: {context}\n\n"
    "query: {query}"
)

generate_with_context_prompt = ChatPromptTemplate.from_template(generate_with_context_template)
generate_with_context_chain = generate_with_context_prompt | llm | StrOutputParser()

In [None]:
query = "What is a lexical gap?"
context = ensemble_retriever.invoke(query)
response = generate_with_context_chain.invoke({"query": query, "context": context})

print(response)

# Part 9: LangGraph

In [None]:
from typing import TypedDict, Annotated
from langchain_core.documents import Document
from langchain_core.messages.base import BaseMessage
import operator

class AgentSate(TypedDict):
    """The dictionary keeps track of the data required by the various nodes in the graph"""

    query: str
    chat_history: list[BaseMessage]
    generation: str
    documents: list[Document]

In [None]:
def router_node(state: dict):
    query = state["query"]
    response = chain_router.invoke({"query": query, "output_instructions": question_router_parser.get_format_instructions()})
    chosen_tool = response.tool_name.lower()

    if chosen_tool == "vectorstore":
        print(f"Tool: vector_store")
        return "vector_store"

    if chosen_tool == "searchengine":
        print(f"Tool: search_engine")
        return "search_engine"

    print(f"Tool: fallback")
    return "fallback"

def retrieve_node(state: dict):
    query = state["query"]
    documents = ensemble_retriever.invoke(query)

    print(f'Number of retrieved documents: {len(documents)}')

    return {"documents": documents}

def search_engine_node(state: dict):
    query = state["query"]
    documents = chain_engine_search.invoke(query)

    print(f'Number of searched documents: {len(documents)}')

    return {"documents": documents}

def fallback_node(state: dict):
    query = state["query"]
    chat_history = state["chat_history"]
    generation = fallback_chain.invoke({"query": query, "chat_history": chat_history})

    return {"generation": generation}


def generate_with_context_node(state: dict):
    query = state["query"]
    documents = state["documents"]

    print(f'Query: {query}')
    print(f'Documents: {documents}')

    generation = generate_with_context_chain.invoke({"query": query, "context": documents})
    return {"generation": generation}

def filter_documents_node(state: dict):
  query = state["query"]
  documents = state["documents"]
  filtered_documents = []

  for document in documents:
    response = chain_check_relevancy.invoke({"query": query, "document": document,
                                  "output_instructions": relevancy_check_parser.get_format_instructions()})

    relevancy = response.relevance.lower()

    if relevancy == 'relevant':
      print(f'Relavent document: {document}')
      filtered_documents.append(document)
    else:
      print(f'Irrelavent document: {document}')

  return {"documents": filtered_documents}

def continue_with_generation(state: dict):
  print(f'Number of relavent documents: {len(state["documents"])}')

  if len(state['documents']) > 0:
    return 'generate_with_context'
  else:
    return 'search_engine'

In [None]:
from langgraph.graph import StateGraph, END

workflow = StateGraph(AgentSate)

workflow.add_node("vector_store", retrieve_node)
workflow.add_node("search_engine", search_engine_node)
workflow.add_node("fallback", fallback_node)
workflow.add_node("generate_with_context", generate_with_context_node)
workflow.add_node("filter_documents", filter_documents_node)

workflow.set_conditional_entry_point(
    router_node,
    {
        "fallback": "fallback",
        "vector_store": "vector_store",
        "search_engine": "search_engine"
    },
)

workflow.add_edge("vector_store", "filter_documents")
workflow.add_edge("search_engine", "filter_documents")
workflow.add_conditional_edges(
    "filter_documents",
    continue_with_generation,
    {
        "search_engine": "search_engine",
        "generate_with_context": "generate_with_context",
    },
)
workflow.add_edge("generate_with_context", END)
workflow.add_edge("fallback", END)

In [None]:
app = workflow.compile(debug=False)
plot = app.get_graph().draw_mermaid_png()

with open("plot.png", "wb") as fp:
    fp.write(plot)

from io import BytesIO
from PIL import Image
from IPython.display import display

img = Image.open(BytesIO(plot))
display(img)

In [None]:
response = app.invoke({"query": "Who is the president of Iran?", "chat_history": []})
print(response["generation"])

In [None]:
response = app.invoke({"query": "What is RAG (Retrieval-Augmented Generation)?", "chat_history": []})
print(response["generation"])

In [None]:
response = app.invoke({"query": "Explain the knapsack problem", "chat_history": []})
print(response["generation"])

In [None]:
response = app.invoke({"query": "What is the huggingface library intended to do?", "chat_history": []})
print(response["generation"])

In [None]:
import gradio as gr
from uuid import uuid4
from langchain_core.messages import HumanMessage, AIMessage

history = {}
session_id = str(uuid4())

def chat(query):

    # Initialize the chat history for the current session
    if session_id not in history:
        history[session_id] = []

    chat_history = history[session_id]

    # Invoke the app with the current query and chat history
    result = app.invoke({"query": query, "chat_history": chat_history})

    # Separate the response from the retrieved documents
    response = result["generation"]
    documents = result.get("documents")

    # Add the current exchange to the chat history
    chat_history.extend([HumanMessage(content=query), AIMessage(content=response)])

    if not documents:
        return response, documents

    documents = [
        f"{doc.page_content}\nsource: {doc.metadata['source']}" for doc in documents
    ]

    return response, "\n\n".join(documents)

# Create the Gradio interface
demo = gr.Interface(
    fn=chat,
    inputs=gr.Textbox(label="Question"),
    outputs=[gr.Textbox(label="Response"), gr.Textbox(label="Retrieved Documents")],
    title="RAG Chatbot",
    description="Ask a Computer Science or NLP related query and the chatbot will generate a response using Retrieval Augmented Generation.",
)

demo.launch(share=True, inline=True, debug=True)