In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.schema import Document

embedding_function = HuggingFaceEmbeddings(model='all-MiniLM-L6-v2')

docs = [
    Document(
        page_content="Bella Vista is owned by Antonio Rossi, a renowned chef with over 20 years of experience in the culinary industry. He started Bella Vista to bring authentic Italian flavors to the community.",
        metadata={"source": "owner.txt"},
    ),
    Document(
        page_content="Bella Vista offers a range of dishes with prices that cater to various budgets. Appetizers start at $8, main courses range from $15 to $35, and desserts are priced between $6 and $12.",
        metadata={"source": "dishes.txt"},
    ),
    Document(
        page_content="Bella Vista is open from Monday to Sunday. Weekday hours are 11:00 AM to 10:00 PM, while weekend hours are extended from 11:00 AM to 11:00 PM.",
        metadata={"source": "restaurant_info.txt"},
    ),
    Document(
        page_content="Bella Vista offers a variety of menus including a lunch menu, dinner menu, and a special weekend brunch menu. The lunch menu features light Italian fare, the dinner menu offers a more extensive selection of traditional and contemporary dishes, and the brunch menu includes both classic breakfast items and Italian specialties.",
        metadata={"source": "restaurant_info.txt"},
    ),
]

db = Chroma.from_documents(docs, embedding_function)

retriever = db.as_retriever(search_kwargs={"k": 2})

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain_core.prompts import ChatPromptTemplate

template = """Answer the question based on the following context and the Chathistory. Especially take the latest question into consideration:

Chathistory: {history}

Context: {context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [5]:
from langchain_groq import ChatGroq

llm = ChatGroq(model='gemma2-9b-it')

rag_chain = prompt | llm

In [None]:
from typing import TypedDict, List
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain.schema import Document
from pydantic import BaseModel, Field
from langchain.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

class AgentState(TypedDict):
    messages: List[BaseMessage]
    documents: List[Document]
    on_topic: str
    rephrased_question: str
    proceed_to_generate: bool
    rephrase_count: int
    question: HumanMessage

class GradeQuestion(BaseModel):
    score: str = Field(
        description="Question is about the specified topics? If yes -> 'Yes' if not -> 'No'"
    )

def question_rewriter(state: AgentState):
    print(f"Entering question_rewriter with following state: {state}")

    state['documents'] = []
    state["on_topic"] = ""
    state['rephrased_question'] = ""
    state['proceed_to_generate'] = False
    state['rephrase_count'] = 0
    
    if 'messages' not in state or state['messages'] is None:
        state['messages'] = []

    if state['question'] not in state['messages']:
        state['messages'].append(state['question'])

    if len(state['messages']) > 1:
        conversation = state['messages'][:-1]
        current_question = state['question'].content
        messages = [
            SystemMessage(
                content="You are a helpful assistant that rephrases the user's question to be a standalone question optimized for retrieval."
            )
        ]
        messages.extend(conversation)
        messages.append(HumanMessage(content=current_question))
        rephrase_prompt = ChatPromptTemplate.from_messages(messages)
        llm = ChatGroq(model='gemma2-9b-it')
        chain = rephrase_prompt | llm
        response = chain.invoke({})
        better_question = response.content.strip()
        print(f"question_rewriter: Rephrased question: {better_question}")
        state["rephrased_question"] = better_question
    else:
        state["rephrased_question"] = state["question"].content
    return state
        
def question_classifier(state: AgentState):
    print("Entering question_classifier")

    system_text = """You are a classifier. Your task is to determine if the user's question is about one of the following specific topics concerning the restaurant Bella Vista:

    1. Information about the owner of Bella Vista, Antonio Rossi.
    2. Prices of dishes at Bella Vista.
    3. Opening hours of Bella Vista.

    Based on whether the question falls into one of these categories, you will determine the appropriate value for the 'score' field in the output schema.
    """
    system_message = SystemMessage(content=system_text)
    human_message = HumanMessage(
        content = f"User Question: {state['rephrased_question']}"
    )
    grade_prompt = ChatPromptTemplate.from_messages([system_message, human_message])
    llm = ChatGroq(model='gemma2-9b-it')
    structured_llm = llm.with_structured_output(GradeQuestion)
    grader_llm = grade_prompt | structured_llm
    result = grader_llm.invoke({})
    state["on_topic"] = result.score.strip()
    print(f"question_classifier: on_topic = {state['on_topic']}")
    return state

def on_topic_router(state: AgentState):
    print("Entering on_topic_router")
    on_topic = state.get("on_topic", "").strip().lower()
    if on_topic == "yes":
        print("Routing to retrieve")
        return "retrieve"
    else:
        print("Routing to off_topic_response")
        return "off_topic_response"

def retrieve(state: AgentState):
    print("Entering retrieve")
    documents = retriever.invoke(state["rephrased_question"])
    print(f"retrieve: Retrieved {len(documents)} documents")
    state["documents"] = documents
    return state

class GradeDocument(BaseModel):
    score: str = Field(
        description="Document is relevant to the question? If yes -> 'Yes' if not -> 'No'"
    )

def retrieval_grader(state: AgentState):
    print("Entering retrieval_grader")

    system_message = SystemMessage(
        content = """You are a classifier. Your task is to determine whether a retrieved document is relevant to a user's question.
        Based on whether the document is relevant to the user's question, you will determine the appropriate value for the 'score' field in the output schema.
        """
    )
    llm = ChatGroq(model='gemma2-9b-it')
    structured_llm = llm.with_structured_output(GradeDocument)

    relevant_docs = []
    for doc in state['documents']:
        human_message = HumanMessage(
            content = f"User Question: {state['rephrased_question']}\n\nRetrieved Documents:\n{doc.page_content}"
        )
        grade_prompt = ChatPromptTemplate.from_messages([system_message, human_message])
        grader_chain = grade_prompt | structured_llm
        result = grader_chain.invoke({})
        print(
            f"Grading document: {doc.page_content[:30]}... Result: {result.score.strip()}"
        )
        if result.score.strip().lower() == "yes":
            relevant_docs.append(doc)

    state["documents"] = relevant_docs
    state["proceed_to_generate"] = len(relevant_docs) > 0
    print(f"retrieval_grader: proceed_to_generate = {state['proceed_to_generate']}")
    return state

        









