In [None]:
!pip install langchain
!pip install langchain-openai
!pip install langchain-community
!pip install langchain-chroma
!pip install pypdf
!pip install gradio

In [None]:
# Required Imports
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.runnables import Runnable
from langchain_core.messages import HumanMessage, AIMessage
import gradio as gr
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter OpenAI Key")
openai_api_key = os.environ["OPENAI_API_KEY"]

# Initialize Chat History
chat_history = []

def create_db_from_pdf(pdf_url: str) -> Chroma:
    """
    Load and process the PDF document into a Chroma vector database.

    Args:
        pdf_url (str): URL to the PDF document.

    Returns:
        Chroma: The Chroma vector database created from the document.
    """
    try:
        # Load the PDF document
        loader = PyPDFLoader(pdf_url)
        data = loader.load()

        # Split the document into manageable chunks
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
        chunked_docs = text_splitter.split_documents(data)

        # Generate embeddings using OpenAI's model
        embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
        db = Chroma.from_documents(
            documents=chunked_docs,
            embedding=embeddings,
            collection_name="nestle_policy",
            collection_metadata={"hnsw:space": "cosine"}
        )
        return db
    except Exception as e:
        raise ValueError(f"Failed to load and process the PDF: {e}")

def create_chain(db: Chroma, llm: ChatOpenAI) -> Runnable:
    """
    Create the RAG (Retrieval-Augmented Generation) chain using the provided vector database and LLM.

    Args:
        db (Chroma): The Chroma vector database.
        llm (ChatOpenAI): The language model to use for generating responses.

    Returns:
        Runnable: The RAG chain combining history-aware retrieval and question-answering.
    """
    # Create the similarity-based retriever
    similarity_retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6})

    # Define prompts for rephrasing and question-answering
    rephrase_system_prompt = """
    Given a chat history and the latest user question, formulate a standalone question
    which can be understood without the chat history. Do NOT answer the question,
    just reformulate it if needed and otherwise return it as is.
    """
    qa_system_prompt = """
    You are a helpful assistant that can answer questions about Human Resource Policies
    based on the document provided: The Nestle Human Resources Policy.
    You are an assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer the question.
    Only use the factual information from the document to answer the question.
    If you don't know the answer, just say "I don't know".
    Keep the answer concise and within 5 lines unless the user asks for more information.
    Context:
    {context}
    """

    # Create chat templates for rephrasing and question-answering
    rephrase_prompt = ChatPromptTemplate.from_messages([
        ("system", rephrase_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ])
    qa_prompt = ChatPromptTemplate.from_messages([
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ])

    # Create chains for rephrasing and question answering
    history_aware_retriever = create_history_aware_retriever(llm, similarity_retriever, rephrase_prompt)
    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

    # Combine retrieval and question-answering into a single chain
    qa_rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

    return qa_rag_chain

def generate_response(chain: Runnable, question: str) -> str:
    """
    Generate a response from the RAG chain based on the given user question.

    Args:
        chain (Runnable): The RAG chain for generating responses.
        question (str): The user question to answer.

    Returns:
        str: The answer generated by the chatbot.
    """
    # Invoke the RAG chain with the input question and chat history
    response = chain.invoke({"input": question, "chat_history": chat_history})

    # Append messages to chat history for context
    chat_history.append(HumanMessage(content=question))
    chat_history.append(AIMessage(content=response["answer"]))

    return response["answer"]

def ui_chatbot(input, history):
    """
    Gradio UI callback function for the chatbot.

    Args:
        input (str): User's input message.
        history (list): History of chat messages.

    Returns:
        tuple: Updated input and chat history.
    """
    # Generate response and update history
    bot_message = generate_response(qa_chain, input)
    history.append((input, bot_message))
    return "", history

# Create the Chroma vector database and RAG chain
chroma_db = create_db_from_pdf("https://www.nestle.com/sites/default/files/asset-library/documents/jobs/humanresourcespolicy.pdf")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
qa_chain = create_chain(chroma_db, llm)

# Gradio Interface
with gr.Blocks() as demo:
    # Title and Description
    gr.Markdown(
        """
        # 🏢 Nestlé HR Policy Chatbot
        **Ask questions about Nestlé's HR policies and get responses directly from the document.**

        This chatbot answer questions based on the **Nestlé Human Resources Policy**.
        """
    )
    # Gradio components
    chatbot = gr.Chatbot(height=300)  # Adjust height for better visualization
    msg = gr.Textbox(label="Ask a question about HR Policy")
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

    # Link Gradio components to functions
    btn.click(ui_chatbot, inputs=[msg, chatbot], outputs=[msg, chatbot])
    msg.submit(ui_chatbot, inputs=[msg, chatbot], outputs=[msg, chatbot])

# Launch Gradio demo
gr.close_all()  # Close any previously running Gradio demos
demo.launch(share=True)
